* * Parallel multiplication of matrices using MPI_Isend/MPI_Irecv * SUBROUTINE MATMUL(M, N, L, MYRANK, NPROCS, A, B, C, WORK) IMPLICIT DOUBLE PRECISION (A-H, O-Z) INCLUDE 'mpif.h' DIMENSION A(M/NPROCS, N), B(N, L/NPROCS), C(M/NPROCS, L), & WORK(N, L/NPROCS) INTEGER SRC, DEST, TAG INTEGER STATUS(MPI_STATUS_SIZE, 2), REQUEST(2) * MLOC = M/NPROCS LLOC = L/NPROCS * DEST = MOD( MYRANK-1+NPROCS, NPROCS ) SRC = MOD( MYRANK+1, NPROCS ) * JPOS=MYRANK*LLOC * DO IP=1, NPROCS - 1 TAG = 10000 + IP * CALL MPI_ISEND( B, N*LLOC, MPI_DOUBLE_PRECISION, DEST, TAG, & MPI_COMM_WORLD, REQUEST(1), IERR ) CALL MPI_IRECV( WORK, N*LLOC, MPI_DOUBLE_PRECISION, SRC, TAG, & MPI_COMM_WORLD, REQUEST(2), IERR ) * DO J=1, LLOC DO I=1, MLOC SUM=0.D0 DO K=1, N SUM = SUM + A(I,K) * B(K,J) ENDDO C(I, J+JPOS) = SUM ENDDO ENDDO * CALL MPI_WAITALL(2, REQUEST, STATUS, IERR) * * 拷贝 WORK -> B (可以通过在计算/通信中交替使用 B/WORK 来避免该操作) DO J=1, LLOC DO I=1, N B(I,J) = WORK(I,J) ENDDO ENDDO * JPOS = JPOS + LLOC IF ( JPOS .GE. L ) JPOS = 0 * ENDDO * DO J=1, LLOC DO I=1, MLOC SUM=0.D0 DO K=1, N SUM = SUM + A(I,K) * B(K,J) ENDDO C(I, J+JPOS) = SUM ENDDO ENDDO * RETURN END