* * Parallel matrix multiplication: main program * PROGRAM MATMUL_MAIN IMPLICIT DOUBLE PRECISION (A-H, O-Z) INCLUDE 'mpif.h' PARAMETER (NBUFFER=128*1024*1024/8) DIMENSION BUF(NBUFFER) DOUBLE PRECISION TIME_START, TIME_END EXTERNAL INIT, MATMUL, CHECK * CALL MPI_INIT(IERR) CALL MPI_COMM_RANK(MPI_COMM_WORLD, MYRANK, IERR) CALL MPI_COMM_SIZE(MPI_COMM_WORLD, NPROCS, IERR) * IF (MYRANK.EQ.0) THEN PRINT *, 'Enter M, N, L: ' CALL FLUSH(6) READ(*,*) M, N, L ENDIF CALL MPI_BCAST(M, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, IERR) CALL MPI_BCAST(N, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, IERR) CALL MPI_BCAST(L, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, IERR) * IF ( MOD(M,NPROCS).NE.0 .OR. MOD(L,NPROCS).NE.0 ) THEN IF (MYRANK.EQ.0) PRINT *, 'M OR L Cannot be divided by nprocs!' CALL MPI_FINALIZE(IERR) STOP ENDIF * IA = 1 IB = IA + M/NPROCS * N IC = IB + N * L/NPROCS IWK = IC + M/NPROCS * L IEND = IWK + N * L/NPROCS IF ( IEND .GT. NBUFFER+1 ) THEN IF (MYRANK.EQ.0) PRINT *, 'Insufficient buffer size!' CALL MPI_FINALIZE(IERR) STOP ENDIF * CALL INIT( M, N, L, MYRANK, NPROCS, BUF(IA), BUF(IB), BUF(IC) ) TIME_START = MPI_WTIME() CALL MATMUL( M, N, L, MYRANK, NPROCS, BUF(IA), BUF(IB), BUF(IC), & BUF(IWK) ) TIME_END = MPI_WTIME() CALL CHECK( M, N, L, MYRANK, NPROCS, BUF(IA), BUF(IB), BUF(IC) ) * IF ( MYRANK .EQ. 0 ) THEN PRINT *, 'TIME = ', TIME_END-TIME_START PRINT *, 'MFLOPS = ', M*(N+N-1.0)*L/(TIME_END-TIME_START)*1D-6 ENDIF * CALL MPI_FINALIZE(IERR) STOP END * *------------------------------------------------------------------ * SUBROUTINE INIT(M, N, L, MYRANK, NPROCS, A, B, C) IMPLICIT DOUBLE PRECISION (A-H, O-Z) INCLUDE 'mpif.h' DIMENSION A(M/NPROCS, N), B(N, L/NPROCS), C(M/NPROCS, L) * MLOC = M/NPROCS LLOC = L/NPROCS * * INIT. A, B DO J=1, N DO I=1, MLOC A(I,J) = I+MYRANK*MLOC ENDDO ENDDO * DO J=1, LLOC DO I=1, N B(I,J) = J+MYRANK*LLOC ENDDO ENDDO * RETURN END * *------------------------------------------------------------------ * SUBROUTINE CHECK(M, N, L, MYRANK, NPROCS, A, B, C) IMPLICIT DOUBLE PRECISION (A-H, O-Z) INCLUDE 'mpif.h' DIMENSION A(M/NPROCS, N), B(N, L/NPROCS), C(M/NPROCS, L) INTEGER LOCAL_CODE, CODE * MLOC = M/NPROCS LLOC = L/NPROCS * * CHECK THE RESULTS LOCAL_CODE = 0 DO J=1, L DO I=1, MLOC IF ( ABS(C(I,J) - N*DBLE(I+MYRANK*MLOC)*J) .GT. 1D-10 ) THEN LOCAL_CODE = 1 GOTO 10 ENDIF ENDDO ENDDO * 10 CALL MPI_REDUCE( LOCAL_CODE, CODE, 1, MPI_INTEGER, MPI_SUM, 0, & MPI_COMM_WORLD, IERR) * IF ( MYRANK .EQ. 0 ) THEN PRINT *, 'Code = ', CODE ENDIF * RETURN END