m4_changequote({,})m4_define( {matmul}, {C C---------------------------------------------------------------------- C SUBROUTINE MATMUL_$1$2$3(LD, N, A, B, C, PEAK, NREP, PRT) REAL*8 A(LD,N), B(LD,N), C(LD,N) REAL CPUTIME, MFLOPS, PEAK REAL ETIME, TARRAY(2), TIME0, TIME1, TIME2 INTEGER PRT * DO J = 1, N DO I = 1, N A(I,J) = 1.0 B(I,J) = 1.0 ENDDO ENDDO * TIME0 = ETIME(TARRAY) call PAPItime(.false.) DO L = 1, NREP DO J = 1, N DO I = 1, N C(I,J) = 0.0 ENDDO ENDDO DO $1 = 1, N DO $2 = 1, N DO $3 = 1, N C(I,J) = C(I,J) + A(I,K) * B(K,J) ENDDO ENDDO ENDDO CALL DUMMY(C) ENDDO call PAPItime(.true.) TIME1 = ETIME(TARRAY) DO I = 1, NREP CALL DUMMY(C) ENDDO TIME2 = ETIME(TARRAY) C CPUTIME = ((TIME1 - TIME0) - (TIME2 - TIME1)) * MFLOPS = (N*REAL(N*NREP)*(2.0*N-1)) / (CPUTIME * 1.0E+06) MFLOPS = (N*REAL(N*NREP)*2.0*N) / (CPUTIME * 1.0E+06) * IF (PRT .NE. 0) THEN PRINT 1000, '$1, $2, $3:', LD, MFLOPS, CPUTIME, MFLOPS/PEAK*100 ENDIF 1000 FORMAT(1X, A, ' LD=', I4, ' Mflops= ', F8.2, + ' Time= ', 1PG10.2, ' Effi.(%)= ', 1PG9.4) RETURN END } ) matmul(I, J, K) matmul(I, K, J) matmul(J, I, K) matmul(J, K, I) matmul(K, I, J) matmul(K, J, I) C C---------------------------------------------------------------------- C SUBROUTINE MATMUL_BLAS(LD, N, A, B, C, PEAK, NREP, PRT) REAL*8 A(LD,N), B(LD,N), C(LD,N) REAL CPUTIME, MFLOPS, PEAK REAL ETIME, TARRAY(2), TIME0, TIME1, TIME2 INTEGER PRT * DO J = 1, N DO I = 1, N A(I,J) = 1.0 B(I,J) = 1.0 ENDDO ENDDO * TIME0 = ETIME(TARRAY) call PAPItime(.false.) DO L = 1, NREP DO J = 1, N DO I = 1, N C(I,J) = 0.0 ENDDO ENDDO CALL DGEMM('n', 'n', N, N, N, 1.D0, A, LD, B, LD, 0.D0, C, LD) CALL DUMMY(C) ENDDO call PAPItime(.true.) TIME1 = ETIME(TARRAY) DO I = 1, NREP CALL DUMMY(C) ENDDO TIME2 = ETIME(TARRAY) C CPUTIME = ((TIME1 - TIME0) - (TIME2 - TIME1)) * MFLOPS = (N*REAL(N*NREP)*(2.0*N-1)) / (CPUTIME * 1.0E+06) MFLOPS = (N*REAL(N*NREP)*2.0*N) / (CPUTIME * 1.0E+06) * IF (PRT .NE. 0) THEN PRINT 1000, ' BLAS:', LD, MFLOPS, CPUTIME, MFLOPS/PEAK*100.0 ENDIF 1000 FORMAT(1X, A, ' LD=', I4, ' Mflops= ', F8.2, + ' Time= ', 1PG10.2, ' Effi.(%)= ', 1PG9.4) RETURN END C C---------------------------------------------------------------------- C SUBROUTINE PAPItime(PRT) LOGICAL PRT * REAL*4 proc_time, mflops, real_time INTEGER*8 flpins, flpins0 INTEGER retval REAL*4 proc_time0, real_time0 SAVE proc_time0, real_time0, flpins0 * return *??? CALL PAPIf_flips(real_time, proc_time, flpins, mflops, retval) * CALL PAPIf_flops(real_time, proc_time, flpins, mflops, retval) IF (retval .ne. 0) THEN PRINT *, 'PAPI error: ', retval RETURN ENDIF IF (PRT) THEN print *, '------- PAPI results' print *, ' Real_time: ', real_time - real_time0 print *, ' Proc_time: ', proc_time - proc_time0 print *, ' Total flpins: ', flpins - flpins0 print *, ' MFLOPS: ', mflops ENDIF real_time0 = real_time proc_time0 = proc_time flpins0 = flpins RETURN END C C---------------------------------------------------------------------- PROGRAM MATMUL_TIME INTEGER SIZE PARAMETER (SIZE=1025) REAL*8 A(SIZE, SIZE), B(SIZE, SIZE), C(SIZE, SIZE), AROW(SIZE) REAL PEAK INTEGER*8 IWK C PRINT *, 'Enter N, PEAK: ' READ(*,*) N, PEAK IF ( N.GT.SIZE ) THEN PRINT *, 'N too large.' STOP ENDIF * NREP = INT(100000000.0/(2.0D0*N*DBLE(N)*N)) IF ( NREP .EQ. 0 ) NREP = 1 * IWK = N * N PRINT 1000, N, 2 * NREP * IWK * N, INT(PEAK + 0.5) 1000 FORMAT(' N=', I4, ' FLOPS=', I12, ' Peak=', I5) m4_ifdef({PAPI}, { CALL matmul0_papi(N, A, B, C, PEAK, NREP) }, {C * * Initialize PAPI CALL PAPItime(.false.) * CALL MATMUL_IJK(N, N, A, B, C, PEAK, NREP, 1) CALL MATMUL_IJK(N+1, N, A, B, C, PEAK, NREP, 1) CALL MATMUL_IKJ(N, N, A, B, C, PEAK, NREP, 1) CALL MATMUL_IKJ(N+1, N, A, B, C, PEAK, NREP, 1) CALL MATMUL_JIK(N, N, A, B, C, PEAK, NREP, 1) CALL MATMUL_JIK(N+1, N, A, B, C, PEAK, NREP, 1) CALL MATMUL_JKI(N, N, A, B, C, PEAK, NREP, 1) CALL MATMUL_JKI(N+1, N, A, B, C, PEAK, NREP, 1) CALL MATMUL_KIJ(N, N, A, B, C, PEAK, NREP, 1) CALL MATMUL_KIJ(N+1, N, A, B, C, PEAK, NREP, 1) CALL MATMUL_KJI(N, N, A, B, C, PEAK, NREP, 1) CALL MATMUL_KJI(N+1, N, A, B, C, PEAK, NREP, 1) CALL MATMUL_BLAS(N, N, A, B, C, PEAK, NREP, 1) CALL MATMUL_BLAS(N+1, N, A, B, C, PEAK, NREP, 1) }) * STOP END c c---------------------------------------------------------------------- c SUBROUTINE DUMMY(C) DOUBLE PRECISION C RETURN END