#include #include #include #include #include "papi.h" #define add_event(es, e) { \ int ret; \ if ((ret = PAPI_add_event(es, e)) != PAPI_OK) \ error("PAPI_add_event", ret); \ } #define remove_event(es, e) { \ int ret; \ if ((ret = PAPI_remove_event(es, e)) != PAPI_OK) \ error("PAPI_remove_event", ret); \ } #define error(s, ret) { \ char buffer[1024]; \ PAPI_perror(ret, buffer, sizeof(buffer)); \ fprintf(stderr, "%s: %s\n", s, buffer); \ exit(1); \ } #if 0 # define matmul_ijk_ matmul_ijk__ # define matmul_ikj_ matmul_ikj__ # define matmul_jik_ matmul_jik__ # define matmul_jki_ matmul_jki__ # define matmul_kij_ matmul_kij__ # define matmul_kji_ matmul_kji__ # define matmul_blas_ matmul_blas__ # define matmul0_papi_ matmul0_papi__ #endif void matmul_ijk_(int *n, double *a, double *b, double *c, int *peak, int *rep, int *prt); void matmul_ikj_(int *n, double *a, double *b, double *c, int *peak, int *rep, int *prt); void matmul_jik_(int *n, double *a, double *b, double *c, int *peak, int *rep, int *prt); void matmul_jki_(int *n, double *a, double *b, double *c, int *peak, int *rep, int *prt); void matmul_kij_(int *n, double *a, double *b, double *c, int *peak, int *rep, int *prt); void matmul_kji_(int *n, double *a, double *b, double *c, int *peak, int *rep, int *prt); void matmul_blas_(int *n, double *a, double *b, double *c, int *peak, int *rep, int *prt); typedef void (*F_func_t)(int *n, double *a, double *b, double *c, int *peak, int *nrep, int *prt); static F_func_t funcs[] = { matmul_ijk_, matmul_jki_, matmul_kij_, matmul_jik_, matmul_ikj_, matmul_kji_, matmul_blas_ }; static int events; static long long counters[128]; static void cleanup(void) { if (PAPI_is_initialized()) PAPI_shutdown(); } void matmul0_papi_(int *n, double *a, double *b, double *c, int *peak, int *rep) { int i, zero = 0, one = 1; if (PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT) { fprintf(stderr, "PAPI_library_init failed.\n"); exit(1); } atexit(cleanup); PAPI_create_eventset(&events); for (i = 0; i < sizeof(funcs)/sizeof(*funcs); i++) { long long misses1, misses2, total1 = 0, total2 = 0; add_event(events, PAPI_L1_TCM); add_event(events, PAPI_L2_TCM); PAPI_start(events); PAPI_reset(events); funcs[i](n, a, b, c, peak, rep, &one); PAPI_stop(events, counters); misses1 = counters[0]; misses2 = counters[1]; PAPI_cleanup_eventset(events); if (PAPI_add_event(events, PAPI_L1_TCA) == PAPI_OK && PAPI_add_event(events, PAPI_L2_TCA) == PAPI_OK) { PAPI_start(events); PAPI_reset(events); funcs[i](n, a, b, c, peak, rep, &zero); PAPI_stop(events, counters); total1 = counters[0]; total2 = counters[1]; } PAPI_cleanup_eventset(events); if (total1 > 0 && total2 > 0) { printf("\t%10lld L1-misses (%0.2lf), %10lld L2-misses (%0.2lf).\n", misses1, misses1/(double)total1, misses2, misses2/(double)total2); } else { printf("\t%10lld L1-misses, %10lld L2-misses.\n", misses1, misses2); } } return; }