1
Fork 0
mirror of https://github.com/Steffo99/unimore-hpc-assignments.git synced 2024-11-26 01:54:22 +00:00

kernel_atax: Parallelizing the second loop gives a nice speedup

This commit is contained in:
Steffo 2022-11-16 18:05:12 +01:00
parent 9c153bb89f
commit d89c501b59
Signed by: steffo
GPG key ID: 6965406171929D01

View file

@ -69,7 +69,9 @@ static void kernel_atax(int nx, int ny,
y[i] = 0; y[i] = 0;
/// This computes... something? I guess whatever ATAX is? /// This computes... something? I guess whatever ATAX is?
// Trying to parallelize this only seems to increase the time required // Now this gives a nice speedup, especially with a lot more threads than the count!
// THREAD_COUNT * 4 seems to be the best on my local computer. What's the best for the Jetson Nano?
#pragma omp parallel for num_threads(THREAD_COUNT * 4) schedule(static)
for (i = 0; i < _PB_NX; i++) for (i = 0; i < _PB_NX; i++)
{ {
/// Every iteration has its own tmp variable /// Every iteration has its own tmp variable