PROGRAM RBGS_MAIN * * Red-black GS iterations: MPI code using sendrecv * INCLUDE 'rbgs1.cmm' PARAMETER ( NSIZE = 1024*1024*15/8 ) DIMENSION F(NSIZE), U(NSIZE), UA(NSIZE) DOUBLE PRECISION T0, T1 * CALL INIT0(A, B, NIT) N = NLOCA M = MLOCA IF ( (N+1)*(M+1) .GT. NSIZE ) THEN PRINT *, 'Error: insufficient buffer size (increase NSIZE).' CALL MPI_ABORT(MPI_COMM_WORLD, 1, IERR) ENDIF * CALL INIT(A, B, N, M, U, UA, F) T0=MPI_WTIME() CALL RBGS(NIT, A, B, N, M, U, F) T1=MPI_WTIME() IF ( MYRANK .EQ. 0 ) PRINT *, 'Wall time = ', T1-T0 CALL CHKSOL(N, M, U, UA) * CALL MPI_FINALIZE(IERR) * STOP END * *----------------------------------------------------------------------- * SUBROUTINE INIT0(A, B, NIT) INCLUDE 'rbgs1.cmm' DOUBLE PRECISION W(2) INTEGER IWK(2) * CALL MPI_INIT(IERR) CALL MPI_COMM_SIZE(MPI_COMM_WORLD, NPROCS, IERR) CALL MPI_COMM_RANK(MPI_COMM_WORLD, MYRANK, IERR) * IF ( MYRANK .EQ. 0 ) THEN PRINT *, 'Enter A, B, N, M, NIT, NP, NQ: ' READ(*,*) A, B, NGLOB, MGLOB, NIT, NP, NQ * * Evaluate processor grid NPxNQ = NPROCS IF ( NP*NQ .NE. 0 ) THEN IF ( NP*NQ .NE. NPROCS ) THEN PRINT *, 'Error: NP*NQ not equal to NPROCS!' CALL MPI_ABORT(MPI_COMM_WORLD, 1, IERR) ENDIF ELSE W(1) = 1.D0/NGLOB W(2) = 1.D0/MGLOB CALL PARTITION(NPROCS, IWK, W, 2) NP = IWK(1) NQ = IWK(2) ENDIF IF ( NGLOB .LT. NP .OR. MGLOB .LT. NQ ) THEN PRINT *, 'Error: # of grid points < # of processors!' CALL MPI_ABORT(MPI_COMM_WORLD, 1, IERR) ENDIF ENDIF CALL MPI_BCAST(A, 1, RTYPE, 0, MPI_COMM_WORLD, IERR) CALL MPI_BCAST(B, 1, RTYPE, 0, MPI_COMM_WORLD, IERR) CALL MPI_BCAST(NGLOB, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, IERR) CALL MPI_BCAST(MGLOB, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, IERR) CALL MPI_BCAST(NIT, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, IERR) CALL MPI_BCAST(NP, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, IERR) CALL MPI_BCAST(NQ, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, IERR) * * Compute 2D processor coordinates RANK_X = MOD( MYRANK, NP ) RANK_Y = MYRANK / NP * * Compute local N: block distribution of interior grid points. I = (NGLOB-1) / NP IF ( RANK_X .LT. MOD(NGLOB-1,NP) ) THEN NLOCA = I + 2 ELSE NLOCA = I + 1 ENDIF J = (MGLOB-1) / NQ IF ( RANK_Y .LT. MOD(MGLOB-1,NQ) ) THEN MLOCA = J + 2 ELSE MLOCA = J + 1 ENDIF * * Compute coordinates of lower-left corner of the subdomain IOFFSET=I*RANK_X IF ( RANK_X .LT. MOD(NGLOB-1,NP) ) THEN IOFFSET = IOFFSET + RANK_X ELSE IOFFSET = IOFFSET + MOD(NGLOB-1,NP) ENDIF JOFFSET=J*RANK_Y IF ( RANK_Y .LT. MOD(MGLOB-1,NQ) ) THEN JOFFSET = JOFFSET + RANK_Y ELSE JOFFSET = JOFFSET + MOD(MGLOB-1,NQ) ENDIF * if (.false.) then PRINT *, MYRANK, ' NP=', NP, ' NQ=', NQ, + ' NLOCA=', NLOCA, ' MLOCA=', MLOCA, + ' IOFFSET=', IOFFSET, ' JOFFSET=', JOFFSET endif * RETURN END * * *----------------------------------------------------------------------- * SUBROUTINE INIT(A, B, N, M, U, UA, F) INCLUDE 'rbgs1.cmm' DIMENSION F(0:N,0:M), U(0:N,0:M), UA(0:N,0:M) * HX=A/DBLE(NGLOB) HY=B/DBLE(MGLOB) * DX=1.D0/(HX*HX) DY=1.D0/(HY*HY) D=1.D0/(DX+DX+DY+DY) * * Analytical solution DO J=0,M DO I=0,N X = (I+IOFFSET)*HX Y = (J+JOFFSET)*HY UA(I,J) = -0.25D0*( X**2 + Y**2 ) ENDDO ENDDO * * Initial approximation and RHS DO J=0,M DO I=0,N F(I,J)=1.D0 * D U(I,J)=0.D0 ENDDO ENDDO * * Boundary conditions IF (RANK_Y .EQ. 0) THEN DO I=0,N U(I,0)=UA(I,0) ENDDO ENDIF * IF (RANK_Y .EQ. NQ-1 ) THEN DO I=0,N U(I,M)=UA(I,M) ENDDO ENDIF * IF (RANK_X .EQ. 0) THEN DO J=0,M U(0,J)=UA(0,J) ENDDO ENDIF * IF (RANK_X .EQ. NP-1) THEN DO J=0,M U(N,J)=UA(N,J) ENDDO ENDIF * RETURN END * *----------------------------------------------------------------------- * SUBROUTINE CHKSOL(N, M, U, UA) INCLUDE 'rbgs1.cmm' DIMENSION U(0:N,0:M), UA(0:N,0:M) * ERR0=0.D0 DO J=1,M-1 DO I=1,N-1 ERR0 = MAX( ERR0, ABS( U(I,J)-UA(I,J) ) ) ENDDO ENDDO CALL MPI_REDUCE(ERR0, ERR, 1, RTYPE, MPI_MAX, 0, MPI_COMM_WORLD, + IERR) IF (MYRANK .EQ. 0) PRINT 1000, ERR 1000 FORMAT(' Error = ', 1PG23.16) * RETURN END * *----------------------------------------------------------------------- * SUBROUTINE RBGS(NIT, A, B, N, M, U, F) INCLUDE 'rbgs1.cmm' DIMENSION F(0:N,0:M), U(0:N,0:M) INTEGER LEFT, RIGHT, UPPER, LOWER, T1, T2 INTEGER STATUS(MPI_STATUS_SIZE) INTEGER LENS(2), DISPS(2), TYPES(2) DATA LENS/1, 1/, TYPES/RTYPE, MPI_UB/, DISPS/0, 0/ * HX=A/DBLE(NGLOB) HY=B/DBLE(MGLOB) * DX=1.D0/(HX*HX) DY=1.D0/(HY*HY) D=1.D0/(DX+DX+DY+DY) DX=DX*D DY=DY*D * LEFT = RANK_X-1 IF (LEFT .GE. 0) THEN LEFT = LEFT + RANK_Y * NP ELSE LEFT = MPI_PROC_NULL ENDIF * RIGHT = RANK_X+1 IF (RIGHT .LT. NP) THEN RIGHT = RIGHT + RANK_Y * NP ELSE RIGHT = MPI_PROC_NULL ENDIF * LOWER = RANK_Y-1 IF (LOWER .GE. 0) THEN LOWER = RANK_X + LOWER * NP ELSE LOWER = MPI_PROC_NULL ENDIF * UPPER = RANK_Y+1 IF (UPPER .LT. NQ) THEN UPPER = RANK_X + UPPER * NP ELSE UPPER = MPI_PROC_NULL ENDIF * * Create datatypes: T1=(1:N-1,.), T2=(.,1:M-1) CALL MPI_TYPE_EXTENT(RTYPE, I, IERR) DISPS(2)=2*I CALL MPI_TYPE_STRUCT(2, LENS, DISPS, TYPES, T1, IERR) CALL MPI_TYPE_COMMIT(T1, IERR) * call mpi_type_extent(t1, j, ierr) * print *, 'ext(T1)=', j DISPS(2)=2*I*(N+1) CALL MPI_TYPE_STRUCT(2, LENS, DISPS, TYPES, T2, IERR) CALL MPI_TYPE_COMMIT(T2, IERR) * call mpi_type_extent(t2, j, ierr) * print *, 'ext(T2)=', j * * RB GS iteration loop DO IT=1, NIT * * Red points: I+J even DO J=1,M-1 DO I=1+MOD(IOFFSET+JOFFSET+J+1,2), N-1, 2 U(I,J) = F(I,J) + DX*( U(I-1,J) + U(I+1,J) ) + + DY*( U(I,J-1) + U(I,J+1) ) ENDDO ENDDO * * Exchange B.C. on red points * * Update inner boundary values: U(1:N-1,1) -> LOWER, U(1:N-1,M) <- UPPER I0=1+MOD(IOFFSET+JOFFSET+1+1,2) I1=1+MOD(IOFFSET+JOFFSET+M+1,2) CALL MPI_SENDRECV(U(I0,1), (N-I0+1)/2, T1, LOWER, 111, + U(I1,M), (N-I1+1)/2, T1, UPPER, 111, + MPI_COMM_WORLD, STATUS, IERR) * * Update inner boundary values: U(1:N-1,M-1) -> UPPER, U(1:N-1,0) <- LOWER I0=1+MOD(IOFFSET+JOFFSET+M-1+1,2) I1=1+MOD(IOFFSET+JOFFSET+0+1,2) CALL MPI_SENDRECV(U(I0,M-1), (N-I0+1)/2, T1, UPPER, 111, + U(I1,0), (N-I1+1)/2, T1, LOWER, 111, + MPI_COMM_WORLD, STATUS, IERR) * * Update inner boundary values: U(1,1:M-1) -> LEFT, U(N,1:M-1) <- RIGHT J0=1+MOD(IOFFSET+JOFFSET+1+1,2) J1=1+MOD(IOFFSET+JOFFSET+N+1,2) CALL MPI_SENDRECV(U(1,J0), (M-J0+1)/2, T2, LEFT, 111, + U(N,J1), (M-J1+1)/2, T2, RIGHT, 111, + MPI_COMM_WORLD, STATUS, IERR) * * Update inner boundary values: U(N-1,1:M-1) -> RIGHT, U(0,1:M-1) <- LEFT J0=1+MOD(IOFFSET+JOFFSET+N-1+1,2) J1=1+MOD(IOFFSET+JOFFSET+0+1,2) CALL MPI_SENDRECV(U(N-1,J0), (M-J0+1)/2, T2, RIGHT, 111, + U(0,J1), (M-J1+1)/2, T2, LEFT, 111, + MPI_COMM_WORLD, STATUS, IERR) * * Black points: I+J odd DO J=1,M-1 DO I=1+MOD(IOFFSET+JOFFSET+J,2), N-1, 2 U(I,J) = F(I,J) + DX*( U(I-1,J) + U(I+1,J) ) + + DY*( U(I,J-1) + U(I,J+1) ) ENDDO ENDDO * * Exchange B.C. on black points * * Update inner boundary values: U(1:N-1,1) -> LOWER, U(1:N-1,M) <- UPPER I0=1+MOD(IOFFSET+JOFFSET+1,2) I1=1+MOD(IOFFSET+JOFFSET+M,2) CALL MPI_SENDRECV(U(I0,1), (N-I0+1)/2, T1, LOWER, 111, + U(I1,M), (N-I1+1)/2, T1, UPPER, 111, + MPI_COMM_WORLD, STATUS, IERR) * * Update inner boundary values: U(1:N-1,M-1) -> UPPER, U(1:N-1,0) <- LOWER I0=1+MOD(IOFFSET+JOFFSET+M-1,2) I1=1+MOD(IOFFSET+JOFFSET+0,2) CALL MPI_SENDRECV(U(I0,M-1), (N-I0+1)/2, T1, UPPER, 111, + U(I1,0), (N-I1+1)/2, T1, LOWER, 111, + MPI_COMM_WORLD, STATUS, IERR) * * Update inner boundary values: U(1,1:M-1) -> LEFT, U(N,1:M-1) <- RIGHT J0=1+MOD(IOFFSET+JOFFSET+1,2) J1=1+MOD(IOFFSET+JOFFSET+N,2) CALL MPI_SENDRECV(U(1,J0), (M-J0+1)/2, T2, LEFT, 111, + U(N,J1), (M-J1+1)/2, T2, RIGHT, 111, + MPI_COMM_WORLD, STATUS, IERR) * * Update inner boundary values: U(N-1,1:M-1) -> RIGHT, U(0,1:M-1) <- LEFT J0=1+MOD(IOFFSET+JOFFSET+N-1,2) J1=1+MOD(IOFFSET+JOFFSET+0,2) CALL MPI_SENDRECV(U(N-1,J0), (M-J0+1)/2, T2, RIGHT, 111, + U(0,J1), (M-J1+1)/2, T2, LEFT, 111, + MPI_COMM_WORLD, STATUS, IERR) * * Compute residual if (.false.) then RES0=0.D0 DO J=1,M-1 DO I=1,N-1 RES = F(I,J) - U(I,J) + DX*( U(I+1,J) + U(I-1,J) ) + + DY*( U(I,J+1) + U(I,J-1) ) RES0 = MAX( RES0, ABS(RES) ) ENDDO ENDDO CALL MPI_REDUCE(RES0, RESIDU, 1, RTYPE, MPI_MAX, 0, + MPI_COMM_WORLD, IERR) IF (MYRANK .EQ. 0) PRINT 1000, IT, RESIDU/D 1000 FORMAT(' It = ', I5, ' Residual = ', 1PG23.16) endif * ENDDO * CALL MPI_TYPE_FREE(T1, IERR) CALL MPI_TYPE_FREE(T2, IERR) * RETURN END