* * This is an answer to the Execise 2(1) on page 20 of mpi973.doc * * The program is based on the MPI_Isend/MPI_Irecv functions to avoid * dead locks. Moreover, it allows overlapping computations and * communications on some systems. * program matmul implicit double precision (a-h, o-z) include 'mpif.h' parameter (nbuffer=450*1024*1024/8) dimension buf(nbuffer) * call MPI_Init(ierr) call MPI_Comm_rank(MPI_COMM_WORLD, myrank, ierr) call MPI_Comm_size(MPI_COMM_WORLD, nprocs, ierr) * if (myrank.eq.0) then print *, 'Enter M, N, L: ' call flush(6) read(*,*) M, N, L endif call MPI_Bcast(M, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) call MPI_Bcast(N, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) call MPI_Bcast(L, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) * if ( mod(m,nprocs).ne.0 .or. mod(l,nprocs).ne.0 ) then if (myrank.eq.0) print *, 'M or L cannot be divided by nprocs!' call MPI_Finalize(ierr) stop endif * ia = 1 ib = ia + m/nprocs * n ic = ib + n * l/nprocs iwk = ic + m/nprocs * l iend = iwk + n * l/nprocs if ( iend .gt. nbuffer+1 ) then if (myrank.eq.0) print *, 'Insufficient buffer size!' call MPI_Finalize(ierr) stop endif * call sub( m, n, l, myrank, nprocs, buf(ia), buf(ib), buf(ic), & buf(iwk) ) * call MPI_Finalize(ierr) stop end * *------------------------------------------------------------------ * subroutine sub(m, n, l, myrank, nprocs, a, b, c, work) implicit double precision (a-h, o-z) include 'mpif.h' dimension a(m/nprocs, n), b(n, l/nprocs), c(m/nprocs, l), & work(n, l/nprocs) integer src, dest, tag, local_code, code integer status(MPI_STATUS_SIZE, 2), request(2) double precision time_start, time_end * mloc = m/nprocs lloc = l/nprocs * dest = mod( myrank-1+nprocs, nprocs ) src = mod( myrank+1, nprocs ) * * Assign values to a, b do j=1, n do i=1, mloc a(i,j) = i+myrank*mloc enddo enddo * do j=1, lloc do i=1, n b(i,j) = j+myrank*lloc enddo enddo * time_start = MPI_Wtime() * jpos=myrank*lloc * do ip=1, nprocs - 1 * tag = 10000 + ip call MPI_Isend( b, n*lloc, MPI_DOUBLE_PRECISION, dest, tag, & MPI_COMM_WORLD, request(1), ierr ) call MPI_Irecv( work, n*lloc, MPI_DOUBLE_PRECISION, src, tag, & MPI_COMM_WORLD, request(2), ierr ) * do j=1, lloc do i=1, mloc sum=0.d0 do k=1, n sum = sum + a(i,k) * b(k,j) enddo c(i, j+jpos) = sum enddo enddo * call MPI_Waitall(2, request, status, ierr) * do j=1, lloc do i=1, n b(i,j) = work(i,j) enddo enddo * jpos = jpos + lloc if ( jpos .ge. l ) jpos = 0 * enddo * do j=1, lloc do i=1, mloc sum=0.d0 do k=1, n sum = sum + a(i,k) * b(k,j) enddo c(i, j+jpos) = sum enddo enddo * time_end = MPI_Wtime() * * Check results local_code = 0 do j=1, l do i=1, mloc if ( abs(c(i,j) - n*dble(i+myrank*mloc)*j) .gt. 1d-10 ) then local_code = 1 goto 10 endif enddo enddo * 10 call MPI_Reduce( local_code, code, 1, MPI_INTEGER, MPI_SUM, 0, & MPI_COMM_WORLD, ierr) * if ( myrank .eq. 0 ) then print *, 'code = ', code print *, 'time = ', time_end-time_start print *, 'mflops = ', m*(n+n-1.d0)*l/(time_end-time_start)*1d-6 endif * return end