1
Fork 0
mirror of https://github.com/Steffo99/unimore-hpc-assignments.git synced 2024-11-22 16:14:24 +00:00

WHO THOUGHT USING THAT TO PREVENT DCE WAS A GOOD IDEA

This commit is contained in:
Steffo 2022-12-02 01:02:09 +01:00
parent 7f02c31247
commit 2d6448e5aa
Signed by: steffo
GPG key ID: 6965406171929D01
2 changed files with 12 additions and 23 deletions

View file

@ -6,9 +6,9 @@ CXXFLAGS+= -DPOLYBENCH_TIME
# -O3 applies all compiler optimization, improving from 800ms to 300ms # -O3 applies all compiler optimization, improving from 800ms to 300ms
CXXFLAGS+= -O3 CXXFLAGS+= -O3
# Enable this to view the contents of the arrays # Enable this to view the contents of the arrays
CXXFLAGS+= -DHPC_DEBUG # CXXFLAGS+= -DHPC_DEBUG
# Enable this to use CUDA # Enable this to use CUDA
CXXFLAGS+= -DHPC_USE_CUDA # CXXFLAGS+= -DHPC_USE_CUDA
# Extend CFLAGS with command line parameters # Extend CFLAGS with command line parameters
CXXFLAGS+= ${EXTRA_CXXFLAGS} CXXFLAGS+= ${EXTRA_CXXFLAGS}

View file

@ -226,6 +226,7 @@ __global__ static void init_array_cuda(DATA_TYPE* A, DATA_TYPE* X, DATA_TYPE* Y)
* *
* To be called on the CPU (uses the `__host__` qualifier). * To be called on the CPU (uses the `__host__` qualifier).
*/ */
#ifdef HPC_DEBUG
__host__ static void print_array(DATA_TYPE* Z, unsigned int size) __host__ static void print_array(DATA_TYPE* Z, unsigned int size)
{ {
for (unsigned int z = 0; z < size; z++) for (unsigned int z = 0; z < size; z++)
@ -234,6 +235,7 @@ __host__ static void print_array(DATA_TYPE* Z, unsigned int size)
} }
fprintf(stderr, "\n"); fprintf(stderr, "\n");
} }
#endif
/** /**
@ -349,7 +351,7 @@ __host__ int main(int argc, char** argv)
DATA_TYPE* A = new DATA_TYPE[NX * NY]; DATA_TYPE* A = new DATA_TYPE[NX * NY];
DATA_TYPE* X = new DATA_TYPE[NY]; DATA_TYPE* X = new DATA_TYPE[NY];
DATA_TYPE* Y = new DATA_TYPE[NX]; volatile DATA_TYPE* Y = new DATA_TYPE[NX];
print_debug("[Pointers] Allocated!"); print_debug("[Pointers] Allocated!");
@ -359,7 +361,7 @@ __host__ int main(int argc, char** argv)
#endif #endif
print_debug("[Init] Initializing..."); print_debug("[Init] Initializing...");
init_array(A, X, Y); init_array(A, X, (double*) Y);
print_debug("[Init] Initialized!"); print_debug("[Init] Initialized!");
#ifndef HPC_INCLUDE_INIT #ifndef HPC_INCLUDE_INIT
@ -368,7 +370,7 @@ __host__ int main(int argc, char** argv)
#endif #endif
print_debug("[Kernel] Running..."); print_debug("[Kernel] Running...");
kernel_atax(A, X, Y); kernel_atax(A, X, (double*) Y);
print_debug("[Kernel] Completed!"); print_debug("[Kernel] Completed!");
print_debug("[Benchmark] Stopping..."); print_debug("[Benchmark] Stopping...");
@ -385,12 +387,6 @@ __host__ int main(int argc, char** argv)
print_array(Y, NX); print_array(Y, NX);
#endif #endif
print_debug("[Verify] Printing...");
polybench_prevent_dce(
print_array(Y, NX)
);
print_debug("[Verify] Done!");
#else #else
print_debug("[Mode] Host-and-device, CUDA"); print_debug("[Mode] Host-and-device, CUDA");
@ -403,7 +399,7 @@ __host__ int main(int argc, char** argv)
DATA_TYPE* host_A = new DATA_TYPE[NX * NY]; DATA_TYPE* host_A = new DATA_TYPE[NX * NY];
DATA_TYPE* host_X = new DATA_TYPE[NY]; DATA_TYPE* host_X = new DATA_TYPE[NY];
#endif #endif
DATA_TYPE* host_Y = new DATA_TYPE[NX]; volatile DATA_TYPE* host_Y = new DATA_TYPE[NX];
print_debug("[CUDA] Allocating A..."); print_debug("[CUDA] Allocating A...");
if(cudaError_t err = cudaMalloc((void**)&A, sizeof(DATA_TYPE) * NX * NY)) if(cudaError_t err = cudaMalloc((void**)&A, sizeof(DATA_TYPE) * NX * NY))
@ -429,7 +425,7 @@ __host__ int main(int argc, char** argv)
} }
print_debug("[CUDA] Allocated Y!"); print_debug("[CUDA] Allocated Y!");
#ifdef POLYBENCH_INCLUDE_INIT #ifdef HPC_INCLUDE_INIT
print_debug("[Benchmark] Starting..."); print_debug("[Benchmark] Starting...");
polybench_start_instruments; polybench_start_instruments;
#endif #endif
@ -443,7 +439,7 @@ __host__ int main(int argc, char** argv)
} }
print_debug("[Init] Complete!"); print_debug("[Init] Complete!");
#ifndef POLYBENCH_INCLUDE_INIT #ifndef HPC_INCLUDE_INIT
print_debug("[Benchmark] Starting..."); print_debug("[Benchmark] Starting...");
polybench_start_instruments; polybench_start_instruments;
#endif #endif
@ -469,7 +465,7 @@ __host__ int main(int argc, char** argv)
#endif #endif
print_debug("[CUDA] Copying Y back..."); print_debug("[CUDA] Copying Y back...");
if(cudaError_t err = cudaMemcpy(host_Y, Y, sizeof(DATA_TYPE) * NX, cudaMemcpyDeviceToHost)) { if(cudaError_t err = cudaMemcpy((void*) host_Y, Y, sizeof(DATA_TYPE) * NX, cudaMemcpyDeviceToHost)) {
print_cudaError(err, "[CUDA] Could copy Y back!"); print_cudaError(err, "[CUDA] Could copy Y back!");
return 1; return 1;
}; };
@ -507,15 +503,8 @@ __host__ int main(int argc, char** argv)
print_debug("[Debug] Displaying X:"); print_debug("[Debug] Displaying X:");
print_array(host_X, NY); print_array(host_X, NY);
print_debug("[Debug] Displaying Y:"); print_debug("[Debug] Displaying Y:");
print_array(host_Y, NX); print_array((double*) host_Y, NX);
#endif #endif
print_debug("[Verify] Printing...");
polybench_prevent_dce(
print_array(host_Y, NX)
);
print_debug("[Verify] Done!");
#endif #endif
return 0; return 0;