* * Parallel multiplication of matrices using MPI_Isend/MPI_Irecv and BLAS * SUBROUTINE MATMUL(M, N, L, MYRANK, NPROCS, A, B, C, WORK) IMPLICIT DOUBLE PRECISION (A-H, O-Z) INCLUDE 'mpif.h' DIMENSION A(M/NPROCS, N), B(N, L/NPROCS), C(M/NPROCS, L), & WORK(N, L/NPROCS) INTEGER SRC, DEST, TAG INTEGER STATUS(MPI_STATUS_SIZE, 2), REQUEST(2) * MLOC = M/NPROCS LLOC = L/NPROCS * DEST = MOD( MYRANK-1+NPROCS, NPROCS ) SRC = MOD( MYRANK+1, NPROCS ) * JPOS=MYRANK*LLOC * DO IP=1, NPROCS - 1 TAG = 10000 + IP * CALL MPI_ISEND( B, N*LLOC, MPI_DOUBLE_PRECISION, DEST, TAG, & MPI_COMM_WORLD, REQUEST(1), IERR ) CALL MPI_IRECV( WORK, N*LLOC, MPI_DOUBLE_PRECISION, SRC, TAG, & MPI_COMM_WORLD, REQUEST(2), IERR ) * CALL DGEMM('N', 'N', MLOC, LLOC, N, 1.D0, A, MLOC, B, N, 0.D0, & C(1,1+JPOS), MLOC) * CALL MPI_WAITALL(2, REQUEST, STATUS, IERR) * * 拷贝 WORK -> B (可以通过在计算/通信中交替使用 B/WORK 来避免该操作) CALL DCOPY(N*LLOC, WORK, 1, B, 1) * JPOS = JPOS + LLOC IF ( JPOS .GE. L ) JPOS = 0 * ENDDO * CALL DGEMM('N', 'N', MLOC, LLOC, N, 1.D0, A, MLOC, B, N, 0.D0, & C(1,1+JPOS), MLOC) * RETURN END