diff --git a/atax/atax.cu b/atax/atax.cu index bde0437..89cb20f 100644 --- a/atax/atax.cu +++ b/atax/atax.cu @@ -27,17 +27,21 @@ */ __host__ static void init_array(DATA_TYPE** A, DATA_TYPE* x) { - for (int i = 0; i < NY; i++) { + for (int i = 0; i < NY; i++) + { x[i] = i * M_PI; } - for (int i = 0; i < NX; i++) { - for (int j = 0; j < NY; j++) { + for (int i = 0; i < NX; i++) + { + for (int j = 0; j < NY; j++) + { A[i][j] = ((DATA_TYPE)i * (j + 1)) / NX; } } } + /** * Print the given array. * @@ -45,9 +49,10 @@ __host__ static void init_array(DATA_TYPE** A, DATA_TYPE* x) * * To be called on the CPU (uses the `__host__` qualifier). */ -__host__ static void print_array(int nx, DATA_TYPE* y) +__host__ static void print_array(DATA_TYPE* y) { - for (int i = 0; i < nx; i++) { + for (int i = 0; i < NX; i++) + { fprintf(stderr, DATA_PRINTF_MODIFIER, y[i]); } fprintf(stderr, "\n"); @@ -63,23 +68,30 @@ __host__ static void print_array(int nx, DATA_TYPE* y) */ __host__ static void kernel_atax(DATA_TYPE** A, DATA_TYPE* x, DATA_TYPE* y) { - for (int i = 0; i < NY; i++) { + for (int i = 0; i < NY; i++) + { y[i] = 0; } - for (int i = 0; i < NX; i++) { + for (int i = 0; i < NX; i++) + { DATA_TYPE tmp = 0; - for (int j = 0; j < NY; j++) { + for (int j = 0; j < NY; j++) + { tmp += A[i][j] * x[j]; } - for (int j = 0; j < NY; j++) { + for (int j = 0; j < NY; j++) + { y[j] = y[j] + A[i][j] * tmp; } } } + + + /** * The main function of the benchmark, which sets up tooling to measure the time spent computing `kernel_atax`. * @@ -88,14 +100,19 @@ __host__ static void kernel_atax(DATA_TYPE** A, DATA_TYPE* x, DATA_TYPE* y) __host__ int main(int argc, char **argv) { - POLYBENCH_2D_ARRAY_DECL(A, DATA_TYPE, NX, NY, nx, ny); - POLYBENCH_1D_ARRAY_DECL(x, DATA_TYPE, NY, ny); - POLYBENCH_1D_ARRAY_DECL(y, DATA_TYPE, NX, nx); + DATA_TYPE **A = new DATA_TYPE*[NX]; + DATA_TYPE x[NY], y[NX]; + + for(size_t i=0; i