/* ====================================================== mult-matrix.cu Use an NON-iterative kernel function thread 0 computes: y[0] = y[0] + x[0] thread 1 computes: y[1] = y[1] + x[1] ... thread N-1 computes: y[N-1] = y[N-1] + x[N-1] ====================================================== */ #include #include #include #include struct timeval start_time, stop_time; int elapsed; void printMatrix( float *A, int n ) { for (int i = 0; i < n; i++ ) { for (int j = 0; j < n; j++ ) { printf("%6.2f ", A[i*n+j]); } printf("\n"); } } int main(int argc, char *argv[]) { if ( argc <= 1 ) { printf("Usage: %s N (NxN matrix multiplication)\n\n", argv[0]); exit(1); } int N = atoi( argv[1] ); int K; float *A, *B, *C; /* ==================================== Allocate arrays ==================================== */ A = malloc(N*N*sizeof(float)); B = malloc(N*N*sizeof(float)); C = malloc(N*N*sizeof(float)); // initialize x and y arrays on the host for (int i = 0; i < N; i++) for (int j = 0; j < N; j++) { A[i*N+j] = rand()/1000000000.0; B[i*N+j] = rand()/1000000000.0; } gettimeofday(&start_time, NULL); // Record current sys time as start_time for (int i = 0; i < N; i++) for (int j = 0; j < N; j++) { float sum; C[i*N+j] = 0; for ( int k = 0; k < N; k++ ) C[i*N+j] += A[i*N+k] * B[k*N+j]; } gettimeofday(&stop_time, NULL); // Record current sys time as stop_time elapsed = (stop_time.tv_sec*1000000 + stop_time.tv_usec) - (start_time.tv_sec*1000000 + start_time.tv_usec); printf("Elasped time = %d micro secs\n", elapsed); if ( N < 10 ) { printf("Matrix A:\n"); printMatrix( A, N ); printf("\nMatrix B:\n"); printMatrix( B, N ); printf("\nMatrix C = A*B:\n"); printMatrix( C, N ); } // Free memory free(A); free(B); free(C); return 0; }