/* matmul.pthread.c SJ */ /* compile gcc matmul.pthread.c -lpthread */ /* an example of pthread usage */ #include #include #include #include #include #include #define N 300 /* creates PP*PP+1 threads */ #define PP 2 #define NEW(type) ( (type*)malloc(sizeof(type)) ) typedef double matrix[N][N]; struct part { matrix *A, *B, *C; int size, blocks, row, col; }; void matmul(matrix *A, matrix *B, matrix *C); void *partmatmul(void *); double ltime(); int main(int argc, char **argv) { int i, j; double starttime, endtime, seqtime, partime; int row, col, val; void *ret = &val; int tids[PP][PP]; pthread_t threads[PP][PP]; struct part parts[PP][PP]; matrix *A, *B, *C; /* space allocation */ A = NEW(matrix); B = NEW(matrix); C = NEW(matrix); if (! (A && B && C)) { perror("malloc: \n"); free(A); free(B); free(C); exit(2); } /* create random matrices */ for (i = 0; i < N; i++) { for (j = 0; j < N; j++) { (*A)[i][j] = (double)(i + j + 1); (*B)[i][j] = (double)(i + j); } } starttime = ltime(); /* sequential */ matmul(A, B, C); endtime = ltime(); seqtime = endtime-starttime; printf("Sequential %6.2f s, %6.2lf MFLOPS, C(6, 9) = %lf \n", seqtime, (N*N*N*2)/(1000000*seqtime), (*C)[6][9]); /* parallel with pthreads */ starttime = ltime(); if (0) (*A)[6][9] = 3.3; /* create PP*PP child threads */ for (row = 0; row < PP; row++) { for (col = 0; col < PP; col++) { parts[row][col].A = A; parts[row][col].B = B; parts[row][col].C = C; parts[row][col].size = N; parts[row][col].blocks = PP; parts[row][col].row = row; parts[row][col].col = col; tids[row][col] = pthread_create(&threads[row][col], NULL, partmatmul, &parts[row][col]); } } for (row = 0; row < PP; row++) for (col = 0; col < PP; col++) pthread_join(threads[row][col], &ret); endtime = ltime(); partime = endtime-starttime; printf("Parallel %6.2f s, %6.2f MFLOPS, C(6, 9) = %f \n", partime, (N*N*N*2)/(1000000*partime), (*C)[6][9]); printf("Speedup : %6.4f\n", (float)seqtime/partime); free(A); free(B); free(C); return 0; } void matmul(matrix *A, matrix *B, matrix *C) { int i, j, k; double c; for (i = 0; i < N; i++) { for (j = 0; j < N; j++) { c = 0.0; for (k = 0; k < N; k++) { c += (*A)[i][k] * (*B)[k][j]; } (*C)[i][j] = c; } } } void *partmatmul(void *task) { struct part * t = (struct part *)task; matrix *A, *B, *C; int size, blocks, row, col; int i, j, k; double c; int Frow, Lrow, Fcol, Lcol; A = t->A; B = t->B; C = t->C; size = t->size; blocks = t->blocks; row = t->row; col = t->col; Frow = row*(size/blocks); Lrow = (row+1)*(size/blocks)-1; Fcol = col*(size/blocks); Lcol = (col+1)*(size/blocks)-1; for (i = Frow; i <= Lrow; i++) { for (j = Frow; j <= Lcol; j++) { c = 0.0; for (k = 0; k < size; k++) { c += (*A)[i][k] * (*B)[k][j]; } (*C)[i][j] = c; } } pthread_exit(NULL); } double ltime() { struct timeval tv; struct timezone tz; gettimeofday(&tv, &tz); return tv.tv_sec + (double)tv.tv_usec/1000000; }