Enabling CUDA fails when building on Jetson Tegra
Nvidia Tegra's platform has full support to build CUDA applications on the device, but I have found that enabling CUDA on the device fails. This simple CMakeLists.txt will fail when run on an Tegra Orin:
set(CMAKE_CUDA_ARCHITECTURES "87")
set(CMAKE_CUDA_COMPILER "/usr/local/cuda/bin/nvcc")
project(foobar CXX CUDA)
The issue is that the CUDA compiler identification tries to link with libcudart_static
, but that library is not included by Nvidia on the CUDA development package that is part of Jetpack.
#$ g++ -D__CUDA_ARCH_LIST__=520 -Wl,--start-group "tmp/a_dlink.o" "tmp/CMakeCUDACompilerId.o" "-L/usr/local/cuda-11.8/bin/../targets/aarch64-linux/lib/stubs" "-L/usr/local/cuda-11.8/bin/../targets/aarch64-linux/lib" -lcudadevrt -lcudart_static -lrt -lpthread -ldl -Wl,--end-group -o "a.out"
/usr/bin/ld: cannot find -lcudart_static
collect2: error: ld returned 1 exit status
Manually adding the libcudart_static.a
from the cross-compilation package fixes the issue and CUDA is enabled correctly. I wasn't able to find a way of having the compiler check code use the dynamic library instead of the static.
The compiler check shouldn't be using a library that is not there.
Full error output:
CMake Error at /snap/cmake/1380/share/cmake-3.29/Modules/CMakeDetermineCompilerId.cmake:814 (message): Compiling the CUDA compiler identification source file "CMakeCUDACompilerId.cu" failed.
Compiler: /usr/local/cuda-11.8/bin/nvcc
Build flags:
Id flags: --keep;--keep-dir;tmp -v
The output was:
1
#$ _NVVM_BRANCH_=nvvm
#$ _SPACE_=
#$ _CUDART_=cudart
#$ _HERE_=/usr/local/cuda-11.8/bin
#$ _THERE_=/usr/local/cuda-11.8/bin
#$ _TARGET_SIZE_=
#$ _TARGET_DIR_=
#$ _TARGET_DIR_=targets/aarch64-linux
#$ TOP=/usr/local/cuda-11.8/bin/..
#$ NVVMIR_LIBRARY_DIR=/usr/local/cuda-11.8/bin/../nvvm/libdevice
#$ LD_LIBRARY_PATH=/usr/local/cuda-11.8/bin/../lib:/usr/local/cuda-11.4/lib64:
#$ PATH=/usr/local/cuda-11.8/bin/../nvvm/bin:/usr/local/cuda-11.8/bin:/usr/local/cuda-11.4/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin
#$ INCLUDES="-I/usr/local/cuda-11.8/bin/../targets/aarch64-linux/include"
#$ LIBRARIES= "-L/usr/local/cuda-11.8/bin/../targets/aarch64-linux/lib/stubs" "-L/usr/local/cuda-11.8/bin/../targets/aarch64-linux/lib"
#$ CUDAFE_FLAGS=
#$ PTXAS_FLAGS=
#$ rm tmp/a_dlink.reg.c
#$ gcc -D__CUDA_ARCH__=520 -D__CUDA_ARCH_LIST__=520 -E -x c++ -DCUDA_DOUBLE_MATH_FUNCTIONS -D__CUDACC__ -D__NVCC__ "-I/usr/local/cuda-11.8/bin/../targets/aarch64-linux/include" -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=8 -D__CUDACC_VER_BUILD__=89 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=8 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -include "cuda_runtime.h" "CMakeCUDACompilerId.cu" -o "tmp/CMakeCUDACompilerId.cpp1.ii"
#$ cicc --c++14 --gnu_version=90400 --display_error_number --orig_src_file_name "CMakeCUDACompilerId.cu" --orig_src_path_name "/home/nvidia/cuda_test/build/CMakeFiles/3.29.1/CompilerIdCUDA/CMakeCUDACompilerId.cu" --allow_managed --unsigned_chars --unsigned_wchar_t -arch compute_52 -m64 --no-version-ident -ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 --include_file_name "CMakeCUDACompilerId.fatbin.c" -tused --gen_module_id_file --module_id_file_name "tmp/CMakeCUDACompilerId.module_id" --gen_c_file_name "tmp/CMakeCUDACompilerId.cudafe1.c" --stub_file_name "tmp/CMakeCUDACompilerId.cudafe1.stub.c" --gen_device_file_name "tmp/CMakeCUDACompilerId.cudafe1.gpu" "tmp/CMakeCUDACompilerId.cpp1.ii" -o "tmp/CMakeCUDACompilerId.ptx"
#$ ptxas -arch=sm_52 -m64 "tmp/CMakeCUDACompilerId.ptx" -o "tmp/CMakeCUDACompilerId.sm_52.cubin"
#$ fatbinary --create="tmp/CMakeCUDACompilerId.fatbin" -64 --cicc-cmdline="-ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 " -no-asm "--image3=kind=elf,sm=52,file=tmp/CMakeCUDACompilerId.sm_52.cubin" "--image3=kind=ptx,sm=52,file=tmp/CMakeCUDACompilerId.ptx" --embedded-fatbin="tmp/CMakeCUDACompilerId.fatbin.c"
#$ gcc -D__CUDA_ARCH_LIST__=520 -E -x c++ -D__CUDACC__ -D__NVCC__ "-I/usr/local/cuda-11.8/bin/../targets/aarch64-linux/include" -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=8 -D__CUDACC_VER_BUILD__=89 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=8 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -include "cuda_runtime.h" "CMakeCUDACompilerId.cu" -o "tmp/CMakeCUDACompilerId.cpp4.ii"
#$ cudafe++ --c++14 --gnu_version=90400 --display_error_number --orig_src_file_name "CMakeCUDACompilerId.cu" --orig_src_path_name "/home/nvidia/cuda_test/build/CMakeFiles/3.29.1/CompilerIdCUDA/CMakeCUDACompilerId.cu" --allow_managed --unsigned_chars --unsigned_wchar_t --m64 --parse_templates --gen_c_file_name "tmp/CMakeCUDACompilerId.cudafe1.cpp" --stub_file_name "CMakeCUDACompilerId.cudafe1.stub.c" --module_id_file_name "tmp/CMakeCUDACompilerId.module_id" "tmp/CMakeCUDACompilerId.cpp4.ii"
#$ gcc -D__CUDA_ARCH__=520 -D__CUDA_ARCH_LIST__=520 -c -x c++ -DCUDA_DOUBLE_MATH_FUNCTIONS "-I/usr/local/cuda-11.8/bin/../targets/aarch64-linux/include" "tmp/CMakeCUDACompilerId.cudafe1.cpp" -o "tmp/CMakeCUDACompilerId.o"
#$ nvlink -m64 --arch=sm_52 --register-link-binaries="tmp/a_dlink.reg.c" "-L/usr/local/cuda-11.8/bin/../targets/aarch64-linux/lib/stubs" "-L/usr/local/cuda-11.8/bin/../targets/aarch64-linux/lib" -cpu-arch=AARCH64 "tmp/CMakeCUDACompilerId.o" -lcudadevrt -o "tmp/a_dlink.sm_52.cubin" --host-ccbin "gcc"
#$ fatbinary --create="tmp/a_dlink.fatbin" -64 --cicc-cmdline="-ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 " -no-asm -link "--image3=kind=elf,sm=52,file=tmp/a_dlink.sm_52.cubin" --embedded-fatbin="tmp/a_dlink.fatbin.c"
#$ gcc -D__CUDA_ARCH_LIST__=520 -c -x c++ -DFATBINFILE=""tmp/a_dlink.fatbin.c"" -DREGISTERLINKBINARYFILE=""tmp/a_dlink.reg.c"" -I. -D__NV_EXTRA_INITIALIZATION= -D__NV_EXTRA_FINALIZATION= -D__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ "-I/usr/local/cuda-11.8/bin/../targets/aarch64-linux/include" -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=8 -D__CUDACC_VER_BUILD__=89 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=8 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 "/usr/local/cuda-11.8/bin/crt/link.stub" -o "tmp/a_dlink.o"
#$ g++ -D__CUDA_ARCH_LIST__=520 -Wl,--start-group "tmp/a_dlink.o" "tmp/CMakeCUDACompilerId.o" "-L/usr/local/cuda-11.8/bin/../targets/aarch64-linux/lib/stubs" "-L/usr/local/cuda-11.8/bin/../targets/aarch64-linux/lib" -lcudadevrt -lcudart_static -lrt -lpthread -ldl -Wl,--end-group -o "a.out"
/usr/bin/ld: cannot find -lcudart_static
collect2: error: ld returned 1 exit status
--error 0x1 --
Call Stack (most recent call first):
/snap/cmake/1380/share/cmake-3.29/Modules/CMakeDetermineCompilerId.cmake:8 (CMAKE_DETERMINE_COMPILER_ID_BUILD) /snap/cmake/1380/share/cmake-3.29/Modules/CMakeDetermineCompilerId.cmake:53 (__determine_compiler_id_test) /snap/cmake/1380/share/cmake-3.29/Modules/CMakeDetermineCUDACompiler.cmake:131 (CMAKE_DETERMINE_COMPILER_ID) CMakeLists.txt:4 (project)