PROGRAM JACOBI_MAIN C C Jacobi iterations: MPI code using MPI-IO function C C Note: because of a small bug some versions of MPICH don't generate C the mpi_file_* functions (there's a patch file on this issue C for MPICH 1.2.5). On suhc systems use pmpi_file_* instead. C (sed -e 's/[Mm][Pp][Ii]_[Ff][Ii][Ll][Ee]_/PMPI_FILE_/g') C INCLUDE 'jacobi5.cmm' PARAMETER ( NSIZE = 1024*1024*15/8 ) DIMENSION F(NSIZE), U(NSIZE), UNEW(NSIZE), UA(NSIZE) DOUBLE PRECISION T0, T1 LOGICAL RST * CALL INIT0(A, B, NIT, RST) N = NLOCA M = MLOCA IF ( (N+1)*(M+1) .GT. NSIZE ) THEN PRINT *, 'Error: insufficient buffer size (increase NSIZE).' CALL MPI_ABORT(MPI_COMM_WORLD, 1, IERR) ENDIF * IF ( RST ) THEN CALL READ_DATA(IT0, A, B, N, M, U, UA, F) ELSE CALL INIT(A, B, N, M, U, UA, F) IT0=0 ENDIF T0=MPI_WTIME() IF ( IT0 .LT. NIT ) THEN CALL JACOBI(IT0, NIT, A, B, N, M, U, UNEW, F, UA) ENDIF T1=MPI_WTIME() IF ( MYRANK .EQ. 0 ) PRINT *, 'Wall time = ', T1-T0 CALL CHKSOL(N, M, U, UA) * CALL MPI_FINALIZE(IERR) * STOP END * *----------------------------------------------------------------------- * SUBROUTINE INIT0(A, B, NIT, RST) INCLUDE 'jacobi5.cmm' LOGICAL RST DOUBLE PRECISION W(2) INTEGER DIMS(2), COMM_X, COMM_Y LOGICAL PERIODS(2), REORDER DATA PERIODS/.FALSE.,.FALSE./, REORDER/.TRUE./ * CALL MPI_INIT(IERR) CALL MPI_COMM_SIZE(MPI_COMM_WORLD, NPROCS, IERR) CALL MPI_COMM_RANK(MPI_COMM_WORLD, MYRANK, IERR) * IF ( MYRANK .EQ. 0 ) THEN PRINT *, 'Enter A, B, N, M, NIT, NP, NQ, NSAVE, RST: ' READ(*,*) A, B, NGLOB, MGLOB, NIT, NP, NQ, NSAVE, RST * * Evaluate processor grid NPxNQ = NPROCS IF ( NP*NQ .NE. 0 ) THEN IF ( NP*NQ .NE. NPROCS ) THEN PRINT *, 'Error: NP*NQ not equal to NPROCS!' CALL MPI_ABORT(MPI_COMM_WORLD, 1, IERR) ENDIF DIMS(1)=NP DIMS(2)=NQ ELSE W(1) = 1.D0/NGLOB W(2) = 1.D0/MGLOB CALL PARTITION(NPROCS, DIMS, W, 2) NP = DIMS(1) NQ = DIMS(2) ENDIF IF ( NGLOB .LT. NP .OR. MGLOB .LT. NQ ) THEN PRINT *, 'Error: # of grid points < # of processors!' CALL MPI_ABORT(MPI_COMM_WORLD, 1, IERR) ENDIF ENDIF CALL MPI_BCAST(A, 1, RTYPE, 0, MPI_COMM_WORLD, IERR) CALL MPI_BCAST(B, 1, RTYPE, 0, MPI_COMM_WORLD, IERR) CALL MPI_BCAST(NGLOB, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, IERR) CALL MPI_BCAST(MGLOB, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, IERR) CALL MPI_BCAST(NIT, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, IERR) CALL MPI_BCAST(DIMS, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, IERR) CALL MPI_BCAST(NSAVE, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, IERR) CALL MPI_BCAST(RST, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, IERR) NP=DIMS(1) NQ=DIMS(2) * * Construct communicator with 2D Cartesian topology CALL MPI_CART_CREATE(MPI_COMM_WORLD, 2, DIMS, PERIODS, REORDER, + COMM, IERR) * * Construct sub-communicators (use PERIODS for REMAIN_DIMS) PERIODS(1)=.TRUE. PERIODS(2)=.FALSE. CALL MPI_CART_SUB(COMM, PERIODS, COMM_X, IERR) CALL MPI_COMM_RANK(COMM_X, RANK_X, IERR) PERIODS(1)=.FALSE. PERIODS(2)=.TRUE. CALL MPI_CART_SUB(COMM, PERIODS, COMM_Y, IERR) CALL MPI_COMM_RANK(COMM_Y, RANK_Y, IERR) * * Compute local N: block distribution of interior grid points. I = (NGLOB-1) / NP IF ( RANK_X .LT. MOD(NGLOB-1,NP) ) THEN NLOCA = I + 2 ELSE NLOCA = I + 1 ENDIF J = (MGLOB-1) / NQ IF ( RANK_Y .LT. MOD(MGLOB-1,NQ) ) THEN MLOCA = J + 2 ELSE MLOCA = J + 1 ENDIF * * Compute coordinates of lower-left corner of the subdomain using MPI_SCAN if ( .true. ) then CALL MPI_SCAN( NLOCA-1, IOFFSET, 1, MPI_INTEGER, MPI_SUM, COMM_X, + IERR ) IOFFSET=IOFFSET-(NLOCA-1) CALL MPI_SCAN( MLOCA-1, JOFFSET, 1, MPI_INTEGER, MPI_SUM, COMM_Y, + IERR ) JOFFSET=JOFFSET-(MLOCA-1) else IOFFSET=I*RANK_X IF ( RANK_X .LT. MOD(NGLOB-1,NP) ) THEN IOFFSET = IOFFSET + RANK_X ELSE IOFFSET = IOFFSET + MOD(NGLOB-1,NP) ENDIF JOFFSET=J*RANK_Y IF ( RANK_Y .LT. MOD(MGLOB-1,NQ) ) THEN JOFFSET = JOFFSET + RANK_Y ELSE JOFFSET = JOFFSET + MOD(MGLOB-1,NQ) ENDIF endif * CALL MPI_COMM_FREE(COMM_X, IERR) CALL MPI_COMM_FREE(COMM_Y, IERR) * * Create and commit datatypes: XTYPE=(1:N-1,.), YTYPE=(.,1:M-1) N=NLOCA M=MLOCA CALL MPI_TYPE_CONTIGUOUS(N-1, RTYPE, XTYPE, IERR) CALL MPI_TYPE_COMMIT(XTYPE, IERR) CALL MPI_TYPE_VECTOR(M-1, 1, N+1, RTYPE, YTYPE, IERR) CALL MPI_TYPE_COMMIT(YTYPE, IERR) * CALL MPI_CART_SHIFT(COMM, 0, 1, LEFT, RIGHT, IERR) CALL MPI_CART_SHIFT(COMM, 1, 1, LOWER, UPPER, IERR) * RETURN END * * *----------------------------------------------------------------------- * SUBROUTINE INIT(A, B, N, M, U, UA, F) INCLUDE 'jacobi5.cmm' DIMENSION F(0:N,0:M), U(0:N,0:M), UA(0:N,0:M) * HX=A/DBLE(NGLOB) HY=B/DBLE(MGLOB) * DX=1.D0/(HX*HX) DY=1.D0/(HY*HY) D=1.D0/(DX+DX+DY+DY) * * Analytical solution DO J=0,M DO I=0,N X = (I+IOFFSET)*HX Y = (J+JOFFSET)*HY UA(I,J) = -0.25D0*( X**2 + Y**2 ) ENDDO ENDDO * * Initial approximation and RHS DO J=0,M DO I=0,N F(I,J)=1.D0 * D U(I,J)=0.D0 ENDDO ENDDO * * Boundary conditions IF (RANK_Y .EQ. 0) THEN DO I=0,N U(I,0)=UA(I,0) ENDDO ENDIF * IF (RANK_Y .EQ. NQ-1 ) THEN DO I=0,N U(I,M)=UA(I,M) ENDDO ENDIF * IF (RANK_X .EQ. 0) THEN DO J=0,M U(0,J)=UA(0,J) ENDDO ENDIF * IF (RANK_X .EQ. NP-1) THEN DO J=0,M U(N,J)=UA(N,J) ENDDO ENDIF * RETURN END * *----------------------------------------------------------------------- * SUBROUTINE CHKSOL(N, M, U, UA) INCLUDE 'jacobi5.cmm' DIMENSION U(0:N,0:M), UA(0:N,0:M) * ERR0=0.D0 DO J=1,M-1 DO I=1,N-1 ERR0 = MAX( ERR0, ABS( U(I,J)-UA(I,J) ) ) ENDDO ENDDO CALL MPI_REDUCE(ERR0, ERR, 1, RTYPE, MPI_MAX, 0, + MPI_COMM_WORLD, IERR) IF (MYRANK .EQ. 0) PRINT 1000, ERR 1000 FORMAT(' Error = ', 1PG23.16) * RETURN END * *----------------------------------------------------------------------- * SUBROUTINE UPDTBD(N, M, U) INCLUDE 'jacobi5.cmm' DIMENSION U(0:N,0:M) INTEGER STATUS(MPI_STATUS_SIZE) * * Update inner boundary values: U(1:N-1,1) -> LOWER, U(1:N-1,M) <- UPPER CALL MPI_SENDRECV(U(1,1), 1, XTYPE, LOWER, 111, + U(1,M), 1, XTYPE, UPPER, 111, + COMM, STATUS, IERR) * * Update inner boundary values: U(1:N-1,M-1) -> UPPER, U(1:N-1,0) <- LOWER CALL MPI_SENDRECV(U(1,M-1), 1, XTYPE, UPPER, 111, + U(1,0), 1, XTYPE, LOWER, 111, + COMM, STATUS, IERR) * * Update inner boundary values: U(1,1:M-1) -> LEFT, U(N,1:M-1) <- RIGHT CALL MPI_SENDRECV(U(1,1), 1, YTYPE, LEFT, 111, + U(N,1), 1, YTYPE, RIGHT, 111, + COMM, STATUS, IERR) * * Update inner boundary values: U(N-1,1:M-1) -> RIGHT, U(0,1:M-1) <- LEFT CALL MPI_SENDRECV(U(N-1,1), 1, YTYPE, RIGHT, 111, + U(0,1), 1, YTYPE, LEFT, 111, + COMM, STATUS, IERR) * RETURN END * *----------------------------------------------------------------------- * SUBROUTINE JACOBI(IT0, NIT, A, B, N, M, U, UNEW, F, UA) INCLUDE 'jacobi5.cmm' DIMENSION F(0:N,0:M), U(0:N,0:M), UNEW(0:N,0:M), UA(*) * HX=A/DBLE(NGLOB) HY=B/DBLE(MGLOB) * DX=1.D0/(HX*HX) DY=1.D0/(HY*HY) D=1.D0/(DX+DX+DY+DY) DX=DX*D DY=DY*D * * Jacobi iteration loop DO IT=IT0+1, NIT DO J=1,M-1 DO I=1,N-1 UNEW(I,J) = F(I,J) + DX*( U(I-1,J) + U(I+1,J) ) + + DY*( U(I,J-1) + U(I,J+1) ) ENDDO ENDDO * DO J=1,M-1 DO I=1,N-1 U(I,J)=UNEW(I,J) ENDDO ENDDO * CALL UPDTBD(N, M, U) * * Compute residual if (.true.) then RES0=0.D0 DO J=1,M-1 DO I=1,N-1 RES = F(I,J) - U(I,J) + DX*( U(I+1,J) + U(I-1,J) ) + + DY*( U(I,J+1) + U(I,J-1) ) RES0 = MAX( RES0, ABS(RES) ) ENDDO ENDDO CALL MPI_REDUCE(RES0, RESIDU, 1, RTYPE, MPI_MAX, 0, + MPI_COMM_WORLD, IERR) IF (MYRANK .EQ. 0) PRINT 1000, IT, RESIDU/D 1000 FORMAT(' It = ', I5, ' Residual = ', 1PG23.16) endif * * Save current solution IF ( NSAVE .GT. 0 .AND. MOD(IT, NSAVE) .EQ. 0 ) THEN CALL WRITE_DATA(IT, A, B, N, M, U, UA, F) ENDIF * ENDDO * IF ( NSAVE .GT. 0 .AND. MOD(NIT, NSAVE) .NE. 0 ) THEN CALL WRITE_DATA(NIT, A, B, N, M, U, UA, F) ENDIF * RETURN END * *----------------------------------------------------------------------- * SUBROUTINE WRITE_DATA(IT, A, B, N, M, U, UA, F) INCLUDE 'jacobi5.cmm' DIMENSION U(*), UA(*), F(*) INTEGER FH, STATUS(MPI_STATUS_SIZE), MODE, RANK INTEGER FILETYPE, MEMTYPE * --- Fortran 90/95 * INTEGER(KIND=MPI_OFFSET_KIND) OFFSET * --- Most 64-bit architecture INTEGER*8 OFFSET * --- 32-bit file offset (Pentium/Linux, MPICH <= 1.2.1) * INTEGER*4 OFFSET * IF ( MYRANK .EQ. 0 ) THEN PRINT *, 'Saving current solution to ''jacobi.dat''.' CALL FLUSH(6) ENDIF * CALL MPI_COMM_RANK(COMM, RANK, IERR) MODE = MPI_MODE_WRONLY + MPI_MODE_CREATE CALL MPI_FILE_OPEN(COMM, 'jacobi.tmp', MODE, MPI_INFO_NULL, + FH, IERR) IF ( IERR .NE. MPI_SUCCESS ) THEN PRINT *, MYRANK, 'Cannot open output file "jacobi.tmp".' CALL MPI_ABORT(COMM, 2, IERR) ENDIF OFFSET=0 CALL MPI_FILE_SET_VIEW(FH, OFFSET, MPI_BYTE, MPI_BYTE, + 'native', MPI_INFO_NULL, IERR) * IF ( RANK .EQ. 0 ) THEN CALL MPI_FILE_WRITE(FH, A, 1, RTYPE, STATUS, IERR) CALL MPI_FILE_WRITE(FH, B, 1, RTYPE, STATUS, IERR) CALL MPI_FILE_WRITE(FH, NGLOB, 1, MPI_INTEGER, STATUS, IERR) CALL MPI_FILE_WRITE(FH, MGLOB, 1, MPI_INTEGER, STATUS, IERR) CALL MPI_FILE_WRITE(FH, IT, 1, MPI_INTEGER, STATUS, IERR) CALL MPI_FILE_GET_POSITION(FH, OFFSET, IERR) ENDIF * CALL MPI_FILE_SYNC(FH, IERR) CALL MPI_BARRIER(COMM, IERR) CALL MPI_FILE_SYNC(FH, IERR) IF ( RANK .EQ. 0 ) I=OFFSET CALL MPI_BCAST(I, 1, MPI_INTEGER, 0, COMM, IERR) OFFSET=I * CALL ARRAY_TYPES(N, M, FILETYPE, MEMTYPE) CALL MPI_FILE_SET_VIEW(FH, OFFSET, RTYPE, FILETYPE, 'native', + MPI_INFO_NULL, IERR) CALL MPI_FILE_WRITE_ALL(FH, U, 1, MEMTYPE, STATUS, IERR) CALL MPI_FILE_WRITE_ALL(FH, UA, 1, MEMTYPE, STATUS, IERR) CALL MPI_FILE_WRITE_ALL(FH, F, 1, MEMTYPE, STATUS, IERR) CALL MPI_TYPE_FREE(FILETYPE, IERR) CALL MPI_TYPE_FREE(MEMTYPE, IERR) CALL MPI_FILE_CLOSE(FH, IERR) * CALL MPI_BARRIER(COMM, IERR) IF ( MYRANK .EQ. 0 ) THEN CALL SYSTEM('mv -f jacobi.tmp jacobi.dat') ENDIF CALL MPI_BARRIER(COMM, IERR) * RETURN END * *----------------------------------------------------------------------- * SUBROUTINE READ_DATA(IT, A, B, N, M, U, UA, F) INCLUDE 'jacobi5.cmm' DIMENSION U(*), UA(*), F(*) INTEGER FH, STATUS(MPI_STATUS_SIZE), MODE, RANK INTEGER FILETYPE, MEMTYPE * --- Fortran 90/95 * INTEGER(KIND=MPI_OFFSET_KIND) OFFSET * --- Most 64-bit architecture * INTEGER*8 OFFSET * --- 32-bit file offset (Pentium/Linux, MPICH <= 1.2.1) INTEGER OFFSET * IF ( MYRANK .EQ. 0 ) THEN PRINT *, 'Reading initial solution from ''jacobi.dat''.' CALL FLUSH(6) ENDIF * CALL MPI_COMM_RANK(COMM, RANK, IERR) MODE = MPI_MODE_RDONLY CALL MPI_FILE_OPEN(COMM, 'jacobi.dat', MODE, MPI_INFO_NULL, + FH, IERR) IF ( IERR .NE. MPI_SUCCESS ) THEN PRINT *, MYRANK, 'Cannot open input file "jacobi.dat".' CALL MPI_ABORT(COMM, 2, IERR) ENDIF OFFSET=0 CALL MPI_FILE_SET_VIEW(FH, OFFSET, MPI_BYTE, MPI_BYTE, + 'native', MPI_INFO_NULL, IERR) * IF ( RANK .EQ. 0 ) THEN CALL MPI_FILE_READ(FH, A0, 1, RTYPE, STATUS, IERR) CALL MPI_FILE_READ(FH, B0, 1, RTYPE, STATUS, IERR) CALL MPI_FILE_READ(FH, NGLOB0, 1, MPI_INTEGER, STATUS, IERR) CALL MPI_FILE_READ(FH, MGLOB0, 1, MPI_INTEGER, STATUS, IERR) CALL MPI_FILE_READ(FH, IT, 1, MPI_INTEGER, STATUS, IERR) CALL MPI_FILE_GET_POSITION(FH, OFFSET, IERR) * IF ( NGLOB0 .NE. NGLOB .OR. MGLOB0 .NE. MGLOB .OR. * + A0 .NE. A .OR. B0 .NE. B ) THEN IF ( NGLOB0.NE.NGLOB .OR. MGLOB0.NE.MGLOB .OR. + ABS(A0-A).GT.1E-6 .OR. ABS(B0-B).GT.1E-6 ) THEN PRINT *, 'Input file does not match current geometry!' CALL MPI_ABORT(COMM, 3, IERR) ENDIF ENDIF * CALL MPI_BCAST(IT, 1, MPI_INTEGER, 0, COMM, IERR) IF ( RANK .EQ. 0 ) I=OFFSET CALL MPI_BCAST(I, 1, MPI_INTEGER, 0, COMM, IERR) OFFSET=I CALL ARRAY_TYPES(N, M, FILETYPE, MEMTYPE) CALL MPI_FILE_SET_VIEW(FH, OFFSET, RTYPE, FILETYPE, 'native', + MPI_INFO_NULL, IERR) CALL MPI_FILE_READ_ALL(FH, U, 1, MEMTYPE, STATUS, IERR) CALL MPI_FILE_READ_ALL(FH, UA, 1, MEMTYPE, STATUS, IERR) CALL MPI_FILE_READ_ALL(FH, F, 1, MEMTYPE, STATUS, IERR) * CALL UPDTBD(N, M, U) CALL UPDTBD(N, M, F) CALL UPDTBD(N, M, UA) * CALL MPI_TYPE_FREE(FILETYPE, IERR) CALL MPI_TYPE_FREE(MEMTYPE, IERR) CALL MPI_FILE_CLOSE(FH, IERR) * RETURN END * *----------------------------------------------------------------------- * SUBROUTINE ARRAY_TYPES(N, M, FILETYPE, MEMTYPE) INCLUDE 'jacobi5.cmm' INTEGER FILETYPE, MEMTYPE INTEGER I1, I2, J1, J2, SIZES(2), SUBSIZES(2), STARTS(2) * * This subroutine creates (and commits) two datatypes: * FILETYPE = subarray (IOFFSET+I1:IOFFSET+N-I2, JOFFSET+J1:JOFFSET+M-J2) * of the array (0:NGLOB, 0:MGLOB) * MEMTYPE = subarray (I1:N-I2, J1:M-J2) of the array (0:NGLOB, 0:MGLOB) * where: * if (RANK_X.eq.0) then I1=0 else I1=1 * if (RANK_X.eq.NP-1) then I2=0 else I2=1 * if (RANK_Y.eq.0) then J1=0 else J1=1 * if (RANK_Y.eq.NQ-1) then J2=0 else J2=1 * I1 = (RANK_X+NP-1)/NP I2 = 1-(RANK_X+1)/NP J1 = (RANK_Y+NQ-1)/NQ J2 = 1-(RANK_Y+1)/NQ * SIZES(1) = NGLOB+1 SIZES(2) = MGLOB+1 STARTS(1) = IOFFSET + I1 STARTS(2) = JOFFSET + J1 SUBSIZES(1) = N - I2 - I1 + 1 SUBSIZES(2) = M - J2 - J1 + 1 CALL MPI_TYPE_CREATE_SUBARRAY(2, SIZES, SUBSIZES, STARTS, + MPI_ORDER_FORTRAN, RTYPE, FILETYPE, IERR) CALL MPI_TYPE_COMMIT(FILETYPE, IERR) * SIZES(1) = N+1 SIZES(2) = M+1 STARTS(1) = I1 STARTS(2) = J1 CALL MPI_TYPE_CREATE_SUBARRAY(2, SIZES, SUBSIZES, STARTS, + MPI_ORDER_FORTRAN, RTYPE, MEMTYPE, IERR) CALL MPI_TYPE_COMMIT(MEMTYPE, IERR) * RETURN END