/* matmul.shmem.c SJ */ /* an example of SysV shmem usage */ #include #include #include #include #include #include #include #include #define N 400 /* creates PP*PP+1 processes */ #define PP 2 #define KEY 123 #define NEW(type) ( (type*)malloc(sizeof(type)) ) typedef double matrix[N][N]; void matmul(matrix *A, matrix *B, matrix *C); void partmatmul(matrix *A, matrix *B, matrix *C, int size, int blocks, int row, int col); double dtime(); int main(int argc, char **argv) { pid_t child; int i, j; double starttime, endtime, seqtime, partime; int row, col; struct shmid_ds smdesc; /* shared memory descriptor */ int smid; /* shamred memory block id */ char *smaddr; /* local mapping address of shared memory */ pid_t childs[PP][PP]; matrix *A, *B, *C; matrix *sA, *sB, *sC; /* space allocation */ A = NEW(matrix); B = NEW(matrix); C = NEW(matrix); if (! (A && B && C)) { perror("malloc: \n"); free(A); free(B); free(C); exit(2); } /* create random matrices */ for (i = 0; i < N; i++) { for (j = 0; j < N; j++) { (*A)[i][j] = (double)(i + j + 1); (*B)[i][j] = (double)(i + j); } } starttime = dtime(); /* sequential */ matmul(A, B, C); endtime = dtime(); seqtime = endtime-starttime; printf("Sequential %6.2f s, %6.2lf MFLOPS, C(6, 9) = %lf \n", seqtime, (N*N*N*2)/(1000000*seqtime), (*C)[6][9]); /* parallel with shared memory */ starttime = dtime(); /* allocate shareds memory segments for A, B, C */ smid = shmget(KEY, 3*sizeof(matrix), IPC_CREAT | IPC_EXCL | 0600); if (smid == -1) { perror("shmget: "); exit(1); } /* attach shared memory for local use */ smaddr = shmat(smid, NULL, 0); if (!smaddr) { perror("shmat: "); exit(1); } if (0) (*A)[6][9] = 3.3; /* create addresses to matrices */ sA = (matrix*)smaddr; sB = (matrix*)(smaddr + sizeof(matrix)); sC = (matrix*)(smaddr + 2*sizeof(matrix)); /* copy A and B to shared memory */ memcpy(sA, A, sizeof(matrix)); memcpy(sB, B, sizeof(matrix)); /* create PP*PP child processes */ for (row = 0; row < PP; row++) { for (col = 0; col < PP; col++) { child = fork(); if (child == 0) { /* child process */ partmatmul(sA, sB, sC, N, PP, row, col); exit(0); } else if (child == -1) { perror("fork: "); exit(1); } else { childs[row][col] = child; } } } for (row = 0; row < PP; row++) for (col = 0; col < PP; col++) waitpid(childs[row][col], NULL, 0); endtime = dtime(); partime = endtime-starttime; printf("Parallel %6.2f s, %6.2f MFLOPS, C(6, 9) = %f \n", partime, (N*N*N*2)/(1000000*partime), (*sC)[6][9]); printf("Speedup : %6.4f\n", (float)seqtime/partime); shmdt(smaddr); shmctl(smid, IPC_RMID, &smdesc); free(A); free(B); free(C); return 0; } void matmul(matrix *A, matrix *B, matrix *C) { int i, j, k; double c; for (i = 0; i < N; i++) { for (j = 0; j < N; j++) { c = 0.0; for (k = 0; k < N; k++) { c += (*A)[i][k] * (*B)[k][j]; } (*C)[i][j] = c; } } } void partmatmul(matrix *A, matrix *B, matrix *C, int size, int blocks, int row, int col) { int i, j, k; double c; int Frow, Lrow, Fcol, Lcol; Frow = row*(size/blocks); Lrow = (row+1)*(size/blocks)-1; Fcol = col*(size/blocks); Lcol = (col+1)*(size/blocks)-1; for (i = Frow; i <= Lrow; i++) { for (j = Frow; j <= Lcol; j++) { c = 0.0; for (k = 0; k < size; k++) { c += (*A)[i][k] * (*B)[k][j]; } (*C)[i][j] = c; } } } double dtime() { struct timeval tv; struct timezone tz; gettimeofday(&tv, &tz); return tv.tv_sec + (double)tv.tv_usec/1000000; }