PROGRAM JACOBI_MAIN * * Jacobi iterations: MPI code using persistent communications * INCLUDE 'jacobi2.cmm' PARAMETER ( NSIZE = 1024*1024*15/8 ) DIMENSION F(NSIZE), U(NSIZE), UNEW(NSIZE), UA(NSIZE) DOUBLE PRECISION T0, T1 * CALL INIT0(A, B, NIT) N = NLOCA M = MLOCA IF ( (N+1)*(M+1) .GT. NSIZE ) THEN PRINT *, 'Error: insufficient buffer size (increase NSIZE).' CALL MPI_ABORT(MPI_COMM_WORLD, 1, IERR) ENDIF * CALL INIT(A, B, N, M, U, UA, F) T0=MPI_WTIME() CALL JACOBI(NIT, A, B, N, M, U, UNEW, F) T1=MPI_WTIME() IF ( MYRANK .EQ. 0 ) PRINT *, 'Wall time = ', T1-T0 CALL CHKSOL(N, M, U, UA) * CALL MPI_FINALIZE(IERR) * STOP END * *----------------------------------------------------------------------- * SUBROUTINE INIT0(A, B, NIT) INCLUDE 'jacobi2.cmm' DOUBLE PRECISION W(2) INTEGER IWK(2) * CALL MPI_INIT(IERR) CALL MPI_COMM_SIZE(MPI_COMM_WORLD, NPROCS, IERR) CALL MPI_COMM_RANK(MPI_COMM_WORLD, MYRANK, IERR) * IF ( MYRANK .EQ. 0 ) THEN PRINT *, 'Enter A, B, N, M, NIT, NP, NQ: ' READ(*,*) A, B, NGLOB, MGLOB, NIT, NP, NQ * * Evaluate processor grid NPxNQ = NPROCS IF ( NP*NQ .NE. 0 ) THEN IF ( NP*NQ .NE. NPROCS ) THEN PRINT *, 'Error: NP*NQ not equal to NPROCS!' CALL MPI_ABORT(MPI_COMM_WORLD, 1, IERR) ENDIF ELSE W(1) = 1.D0/NGLOB W(2) = 1.D0/MGLOB CALL PARTITION(NPROCS, IWK, W, 2) NP = IWK(1) NQ = IWK(2) ENDIF IF ( NGLOB .LT. NP .OR. MGLOB .LT. NQ ) THEN PRINT *, 'Error: # of grid points < # of processors!' CALL MPI_ABORT(MPI_COMM_WORLD, 1, IERR) ENDIF ENDIF CALL MPI_BCAST(A, 1, RTYPE, 0, MPI_COMM_WORLD, IERR) CALL MPI_BCAST(B, 1, RTYPE, 0, MPI_COMM_WORLD, IERR) CALL MPI_BCAST(NGLOB, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, IERR) CALL MPI_BCAST(MGLOB, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, IERR) CALL MPI_BCAST(NIT, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, IERR) CALL MPI_BCAST(NP, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, IERR) CALL MPI_BCAST(NQ, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, IERR) * * Compute 2D processor coordinates RANK_X = MOD( MYRANK, NP ) RANK_Y = MYRANK / NP * * Compute local N: block distribution of interior grid points. I = (NGLOB-1) / NP IF ( RANK_X .LT. MOD(NGLOB-1,NP) ) THEN NLOCA = I + 2 ELSE NLOCA = I + 1 ENDIF J = (MGLOB-1) / NQ IF ( RANK_Y .LT. MOD(MGLOB-1,NQ) ) THEN MLOCA = J + 2 ELSE MLOCA = J + 1 ENDIF * * Compute coordinates of lower-left corner of the subdomain IOFFSET=I*RANK_X IF ( RANK_X .LT. MOD(NGLOB-1,NP) ) THEN IOFFSET = IOFFSET + RANK_X ELSE IOFFSET = IOFFSET + MOD(NGLOB-1,NP) ENDIF JOFFSET=J*RANK_Y IF ( RANK_Y .LT. MOD(MGLOB-1,NQ) ) THEN JOFFSET = JOFFSET + RANK_Y ELSE JOFFSET = JOFFSET + MOD(MGLOB-1,NQ) ENDIF * if (.false.) then PRINT *, MYRANK, ' NP=', NP, ' NQ=', NQ, + ' NLOCA=', NLOCA, ' MLOCA=', MLOCA, + ' IOFFSET=', IOFFSET, ' JOFFSET=', JOFFSET endif * RETURN END * * *----------------------------------------------------------------------- * SUBROUTINE INIT(A, B, N, M, U, UA, F) INCLUDE 'jacobi2.cmm' DIMENSION F(0:N,0:M), U(0:N,0:M), UA(0:N,0:M) * HX=A/DBLE(NGLOB) HY=B/DBLE(MGLOB) * DX=1.D0/(HX*HX) DY=1.D0/(HY*HY) D=1.D0/(DX+DX+DY+DY) * * Analytical solution DO J=0,M DO I=0,N X = (I+IOFFSET)*HX Y = (J+JOFFSET)*HY UA(I,J) = -0.25D0*( X**2 + Y**2 ) ENDDO ENDDO * * Initial approximation and RHS DO J=0,M DO I=0,N F(I,J)=1.D0 * D U(I,J)=0.D0 ENDDO ENDDO * * Boundary conditions IF (RANK_Y .EQ. 0) THEN DO I=0,N U(I,0)=UA(I,0) ENDDO ENDIF * IF (RANK_Y .EQ. NQ-1 ) THEN DO I=0,N U(I,M)=UA(I,M) ENDDO ENDIF * IF (RANK_X .EQ. 0) THEN DO J=0,M U(0,J)=UA(0,J) ENDDO ENDIF * IF (RANK_X .EQ. NP-1) THEN DO J=0,M U(N,J)=UA(N,J) ENDDO ENDIF * RETURN END * *----------------------------------------------------------------------- * SUBROUTINE CHKSOL(N, M, U, UA) INCLUDE 'jacobi2.cmm' DIMENSION U(0:N,0:M), UA(0:N,0:M) * ERR0=0.D0 DO J=1,M-1 DO I=1,N-1 ERR0 = MAX( ERR0, ABS( U(I,J)-UA(I,J) ) ) ENDDO ENDDO CALL MPI_REDUCE(ERR0, ERR, 1, RTYPE, MPI_MAX, 0, + MPI_COMM_WORLD, IERR) IF (MYRANK .EQ. 0) PRINT 1000, ERR 1000 FORMAT(' Error = ', 1PG23.16) * RETURN END * *----------------------------------------------------------------------- * SUBROUTINE JACOBI(NIT, A, B, N, M, U, UNEW, F) INCLUDE 'jacobi2.cmm' DIMENSION F(0:N,0:M), U(0:N,0:M), UNEW(0:N,0:M) INTEGER LEFT, RIGHT, UPPER, LOWER, T1, T2 INTEGER STATUS(MPI_STATUS_SIZE,8), REQUEST(8) * HX=A/DBLE(NGLOB) HY=B/DBLE(MGLOB) * DX=1.D0/(HX*HX) DY=1.D0/(HY*HY) D=1.D0/(DX+DX+DY+DY) DX=DX*D DY=DY*D * LEFT = RANK_X-1 IF (LEFT .GE. 0) THEN LEFT = LEFT + RANK_Y * NP ELSE LEFT = MPI_PROC_NULL ENDIF * RIGHT = RANK_X+1 IF (RIGHT .LT. NP) THEN RIGHT = RIGHT + RANK_Y * NP ELSE RIGHT = MPI_PROC_NULL ENDIF * LOWER = RANK_Y-1 IF (LOWER .GE. 0) THEN LOWER = RANK_X + LOWER * NP ELSE LOWER = MPI_PROC_NULL ENDIF * UPPER = RANK_Y+1 IF (UPPER .LT. NQ) THEN UPPER = RANK_X + UPPER * NP ELSE UPPER = MPI_PROC_NULL ENDIF * * Create datatypes: T1=(1:N-1,.), T2=(.,1:M-1) CALL MPI_TYPE_CONTIGUOUS(N-1, RTYPE, T1, IERR) CALL MPI_TYPE_COMMIT(T1, IERR) CALL MPI_TYPE_VECTOR(M-1, 1, N+1, RTYPE, T2, IERR) CALL MPI_TYPE_COMMIT(T2, IERR) * * Init. persistent communications * * U(1:N-1,1) -> LOWER, U(1:N-1,0) <- LOWER CALL MPI_SEND_INIT(U(1,1), 1, T1, LOWER, 111, + MPI_COMM_WORLD, REQUEST(1), IERR) CALL MPI_RECV_INIT(U(1,0), 1, T1, LOWER, 111, + MPI_COMM_WORLD, REQUEST(5), IERR) * * U(1:N-1,M-1) -> UPPER, U(1:N-1,M) <- UPPER CALL MPI_SEND_INIT(U(1,M-1), 1, T1, UPPER, 111, + MPI_COMM_WORLD, REQUEST(2), IERR) CALL MPI_RECV_INIT(U(1,M), 1, T1, UPPER, 111, + MPI_COMM_WORLD, REQUEST(6), IERR) * * U(1,1:M-1) -> LEFT, U(0,1:M-1) <- LEFT CALL MPI_SEND_INIT(U(1,1), 1, T2, LEFT, 111, + MPI_COMM_WORLD, REQUEST(3), IERR) CALL MPI_RECV_INIT(U(0,1), 1, T2, LEFT, 111, + MPI_COMM_WORLD, REQUEST(7), IERR) * * U(N-1,1:M-1) -> RIGHT, U(N,1:M-1) <- RIGHT CALL MPI_SEND_INIT(U(N-1,1), 1, T2, RIGHT, 111, + MPI_COMM_WORLD, REQUEST(4), IERR) CALL MPI_RECV_INIT(U(N,1), 1, T2, RIGHT, 111, + MPI_COMM_WORLD, REQUEST(8), IERR) * * Jacobi iteration loop DO IT=1, NIT DO J=1,M-1 DO I=1,N-1 UNEW(I,J) = F(I,J) + DX*( U(I-1,J) + U(I+1,J) ) + + DY*( U(I,J-1) + U(I,J+1) ) ENDDO ENDDO * DO J=1,M-1 DO I=1,N-1 U(I,J)=UNEW(I,J) ENDDO ENDDO * * Update boundary values CALL MPI_STARTALL(8, REQUEST, IERR) CALL MPI_WAITALL(8, REQUEST, STATUS, IERR) * * Compute residual if (.true.) then RES0=0.D0 DO J=1,M-1 DO I=1,N-1 RES = F(I,J) - U(I,J) + DX*( U(I+1,J) + U(I-1,J) ) + + DY*( U(I,J+1) + U(I,J-1) ) RES0 = MAX( RES0, ABS(RES) ) ENDDO ENDDO CALL MPI_REDUCE(RES0, RESIDU, 1, RTYPE, MPI_MAX, 0, + MPI_COMM_WORLD, IERR) IF (MYRANK .EQ. 0) PRINT 1000, IT, RESIDU/D 1000 FORMAT(' It = ', I5, ' Residual = ', 1PG23.16) endif * ENDDO * DO I=1, 8 CALL MPI_REQUEST_FREE(REQUEST(I), IERR) ENDDO * CALL MPI_TYPE_FREE(T1, IERR) CALL MPI_TYPE_FREE(T2, IERR) * RETURN END