From d89c501b590a0dfe4e6d73dc0eb6af46a816e55e Mon Sep 17 00:00:00 2001 From: Stefano Pigozzi Date: Wed, 16 Nov 2022 18:05:12 +0100 Subject: [PATCH] `kernel_atax`: Parallelizing the second loop gives a nice speedup --- OpenMP/linear-algebra/kernels/atax/atax.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/OpenMP/linear-algebra/kernels/atax/atax.c b/OpenMP/linear-algebra/kernels/atax/atax.c index 787db79..7469897 100644 --- a/OpenMP/linear-algebra/kernels/atax/atax.c +++ b/OpenMP/linear-algebra/kernels/atax/atax.c @@ -69,7 +69,9 @@ static void kernel_atax(int nx, int ny, y[i] = 0; /// This computes... something? I guess whatever ATAX is? - // Trying to parallelize this only seems to increase the time required + // Now this gives a nice speedup, especially with a lot more threads than the count! + // THREAD_COUNT * 4 seems to be the best on my local computer. What's the best for the Jetson Nano? + #pragma omp parallel for num_threads(THREAD_COUNT * 4) schedule(static) for (i = 0; i < _PB_NX; i++) { /// Every iteration has its own tmp variable