mirror of
https://github.com/Steffo99/unimore-hpc-assignments.git
synced 2024-11-26 01:54:22 +00:00
kernel_atax
: Parallelizing the second loop gives a nice speedup
This commit is contained in:
parent
9c153bb89f
commit
d89c501b59
1 changed files with 3 additions and 1 deletions
|
@ -69,7 +69,9 @@ static void kernel_atax(int nx, int ny,
|
||||||
y[i] = 0;
|
y[i] = 0;
|
||||||
|
|
||||||
/// This computes... something? I guess whatever ATAX is?
|
/// This computes... something? I guess whatever ATAX is?
|
||||||
// Trying to parallelize this only seems to increase the time required
|
// Now this gives a nice speedup, especially with a lot more threads than the count!
|
||||||
|
// THREAD_COUNT * 4 seems to be the best on my local computer. What's the best for the Jetson Nano?
|
||||||
|
#pragma omp parallel for num_threads(THREAD_COUNT * 4) schedule(static)
|
||||||
for (i = 0; i < _PB_NX; i++)
|
for (i = 0; i < _PB_NX; i++)
|
||||||
{
|
{
|
||||||
/// Every iteration has its own tmp variable
|
/// Every iteration has its own tmp variable
|
||||||
|
|
Loading…
Reference in a new issue