diff --git a/OpenMP/linear-algebra/kernels/atax/.bench.sh b/OpenMP/linear-algebra/kernels/atax/.bench.sh index 5507133..8804894 100755 --- a/OpenMP/linear-algebra/kernels/atax/.bench.sh +++ b/OpenMP/linear-algebra/kernels/atax/.bench.sh @@ -1,14 +1,46 @@ #!/bin/bash -runs=9 -totalt=0.0 +run_benchmarks() { + runs=25 + totalt=0.0 -for i in $(seq $runs) + for i in $(seq $runs) + do + exet=$(./atax_acc) + totalt=$(awk "BEGIN{print $totalt+$exet}") + # echo "Run #$i: " $(awk "BEGIN{printf(\"%.3g\", $exet)}") "seconds" + done + + avgt=$(awk "BEGIN{print $totalt/$runs}") + echo " Average of $runs runs: " $(awk "BEGIN{printf(\"%.3g\", $avgt)}") "seconds" +} + +for c in $(seq 0 15) do - exet=$(./atax_acc) - totalt=$(awk "BEGIN{print $totalt+$exet}") - echo "Run #$i: " $(awk "BEGIN{printf(\"%.3g\", $exet)}") "seconds" -done + cflags="" -avgt=$(awk "BEGIN{print $totalt/$runs}") -echo "Average: " $(awk "BEGIN{printf(\"%.3g\", $avgt)}") "seconds" + if (( $c & 1 )) + then + cflags="$cflags -DTOGGLE_INIT_ARRAY_1" + fi + + if (( $c & 2 )) + then + cflags="$cflags -DTOGGLE_INIT_ARRAY_2" + fi + + if (( $c & 4 )) + then + cflags="$cflags -DTOGGLE_KERNEL_ATAX_1" + fi + + if (( $c & 8 )) + then + cflags="$cflags -DTOGGLE_KERNEL_ATAX_2" + fi + + echo "Flags: $cflags" + make "EXTRA_CFLAGS=$cflags" clean all + + run_benchmarks +done \ No newline at end of file diff --git a/OpenMP/linear-algebra/kernels/atax/Makefile b/OpenMP/linear-algebra/kernels/atax/Makefile index b499897..b8dd6d5 100644 --- a/OpenMP/linear-algebra/kernels/atax/Makefile +++ b/OpenMP/linear-algebra/kernels/atax/Makefile @@ -15,12 +15,14 @@ CFLAGS+= -O3 CFLAGS+= -g3 # -DTHREAD_COUNT allows us to alter the number of threads used in the whole file CFLAGS+= -DTHREAD_COUNT=4 +# Extend CFLAGS with command line parameters +CFLAGS+= ${EXTRA_CFLAGS} # Disable make output MAKEFLAGS+= --silent + .PHONY: bench bench: - make clean all ./.bench.sh diff --git a/OpenMP/linear-algebra/kernels/atax/atax.c b/OpenMP/linear-algebra/kernels/atax/atax.c index bd34720..4293a1d 100644 --- a/OpenMP/linear-algebra/kernels/atax/atax.c +++ b/OpenMP/linear-algebra/kernels/atax/atax.c @@ -25,13 +25,17 @@ static void init_array(int nx, int ny, int i, j; /// Initialize the `x` array with PI and its multiples. + #ifdef TOGGLE_INIT_ARRAY_1 #pragma omp parallel for num_threads(THREAD_COUNT) schedule(static) + #endif for (i = 0; i < ny; i++) { x[i] = i * M_PI; } /// Initialize the `A` matrix with [something?] + #ifdef TOGGLE_INIT_ARRAY_2 #pragma omp parallel for num_threads(THREAD_COUNT) schedule(static) + #endif for (i = 0; i < nx; i++) { for (j = 0; j < ny; j++) { A[i][j] = ((DATA_TYPE)i * (j + 1)) / nx; @@ -63,14 +67,18 @@ static void kernel_atax(int nx, int ny, { int i, j; + #ifdef TOGGLE_KERNEL_ATAX_1 #pragma omp parallel for num_threads(THREAD_COUNT) schedule(static) + #endif for (i = 0; i < _PB_NY; i++) y[i] = 0; /// This computes... something? I guess whatever ATAX is? // Now this gives a nice speedup, especially with a lot more threads than the count! // THREAD_COUNT * 4 seems to be the best on my local computer. What's the best for the Jetson Nano? + #ifdef TOGGLE_KERNEL_ATAX_2 #pragma omp parallel for num_threads(THREAD_COUNT) schedule(static) + #endif for (i = 0; i < _PB_NX; i++) { /// Every iteration has its own tmp variable