PROGRAM MM2 ! matmul.dp.t.f90 SJ ! Matrix multiplication using dot product function and transposing of A (to ! make accessing of A follow the memory storage of rows) INTEGER, PARAMETER :: N = 1000 REAL A(N, N), B(N, N), C(N, N) INTEGER I, J ! clock parameters INTEGER trate, tmax, tstart, tend ! alternative input: ! A = RESHAPE( SOURCE = (/ (I,I=1,N*N) /), SHAPE = (/ N,N /) ) ! B = RESHAPE( SOURCE = (/ (I,I=1,N*N) /), SHAPE = (/ N,N /) ) ! simple input: A = 1 B = 2 !------------------------------------------ CALL system_clock(tstart, trate, tmax) A = TRANSPOSE(A) DO i=1,N DO j=1,N C(i, j) = DOT_PRODUCT(A(:,i), B(:, j)) END DO END DO CALL system_clock(tend, trate, tmax) WRITE(*,*) 'Tr+DP : C(6, 9) = ', C(6,9), & (1.0*(tend-tstart))/trate, 's = ', 1.0*N*N*N*2 / & (1000*1000*1000*((1.0*(tend-tstart))/trate)), ' GLOPS' !------------------------------------------ CALL system_clock(tstart, trate, tmax) A = TRANSPOSE(A) ! Use -fopenmp parameter in compilation if you want to ! parallelize the loop in OpenMP !$OMP PARALLEL DO private(i, j) DO i=1,N DO j=1,N C(i, j) = DOT_PRODUCT(A(:,i), B(:, j)) END DO END DO CALL system_clock(tend, trate, tmax) WRITE(*,*) 'OpenMP: C(6, 9) = ', C(6,9), & (1.0*(tend-tstart))/trate, 's = ', 1.0*N*N*N*2 / & (1000*1000*1000*((1.0*(tend-tstart))/trate)), ' GLOPS' !------------------------------------------ CALL system_clock(tstart, trate, tmax) C = MATMUL(A, B) CALL system_clock(tend, trate, tmax) WRITE(*,*) 'MATMUL: C(6, 9) = ', C(6,9), & (1.0*(tend-tstart))/trate, 's = ', 1.0*N*N*N*2 / & (1000*1000*1000*((1.0*(tend-tstart))/trate)), ' GLOPS' END PROGRAM MM2