# Remove all make implicit rules because this is already complex enough as it is MAKEFLAGS+= -r # -DPOLYBENCH_TIME makes Polybench output the execution time of the program CXXFLAGS+= -DPOLYBENCH_TIME # -O3 applies all compiler optimization, improving from 800ms to 300ms CXXFLAGS+= -O3 # Enable this to view the contents of the arrays # CXXFLAGS+= -DHPC_DEBUG # Enable this to use CUDA # CXXFLAGS+= -DHPC_USE_CUDA # Extend CFLAGS with command line parameters CXXFLAGS+= ${EXTRA_CXXFLAGS} # Select the location of the local CUDA install # CUDA_HOME:=/usr/local/cuda-10.0 CUDA_HOME:=/opt/cuda # Specify the directory of the nvc compiler NVCC:=/usr/local/cuda-10.0/bin/nvcc # Specify the flags for the nvc compiler NVCFLAGS:=$(CXXFLAGS) $(NVOPT) # Optimize for @Steffo's NVIDIA GTX 1070 # NVCFLAGS+= -arch=compute_61 # NVCFLAGS+= -code=sm_61 %.elf: %.cu.o polybench.cu.o $(NVCC) $(NVCFLAGS) $^ -o $@ $(LDFLAGS) %.cu.o: %.cu $(NVCC) $(NVCFLAGS) -c $< -o $@ all: atax.elf .PHONY: bench clean bench: ./.bench.sh clean: rm -f *.elf