CUDA: Linker error when cross-compiling to aarch64-linux
This is potentially the same issue as #21688 (closed).
I'm trying to compile a simple test program I came across in a recent blog post from Rob Maynard: https://github.com/robertmaynard/code-samples/tree/master/posts/cmake
My goal is to be able to cross compile code (that may or may not include CUDA) with cmake on my x86 laptop and then deploy it to my Nvidia Jetson Xavier NX (or TX2). The example from above is just a simple program to test the toolchain. My dev environment is a Ubuntu 18.04 docker container with all of Nvidia's sdk_manager
packages installed (cuda v10.2) and CMake 3.20.0, although I also saw this error when using the cuda toolkit installed directly on my laptop (without docker). I can build the code fine for the host machine. To cross compile, I got a sample toolchain file from one of Nvidia's vpi examples:
# Toolchain_aarch64_l4t.cmake
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR aarch64)
set(target_arch aarch64-linux-gnu)
set(CMAKE_LIBRARY_ARCHITECTURE ${target_arch} CACHE STRING "" FORCE)
# Configure cmake to look for libraries, include directories and
# packages inside the target root prefix.
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
set(CMAKE_FIND_ROOT_PATH "/usr/${target_arch}")
# needed to avoid doing some more strict compiler checks that
# are failing when cross-compiling
set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
# specify the toolchain programs
find_program(CMAKE_C_COMPILER ${target_arch}-gcc)
find_program(CMAKE_CXX_COMPILER ${target_arch}-g++)
if(NOT CMAKE_C_COMPILER OR NOT CMAKE_CXX_COMPILER)
message(FATAL_ERROR "Can't find suitable C/C++ cross compiler for ${target_arch}")
endif()
set(CMAKE_AR ${target_arch}-ar CACHE FILEPATH "" FORCE)
set(CMAKE_RANLIB ${target_arch}-ranlib)
set(CMAKE_LINKER ${target_arch}-ld)
# Not all shared libraries dependencies are installed in host machine.
# Make sure linker doesn't complain.
set(CMAKE_EXE_LINKER_FLAGS_INIT -Wl,--allow-shlib-undefined)
# instruct nvcc to use our cross-compiler
set(CMAKE_CUDA_FLAGS "-ccbin ${CMAKE_CXX_COMPILER} -Xcompiler -fPIC" CACHE STRING "" FORCE)]
However when I include the toolchain file, i.e.
cmake -GNinja -DCMAKE_CUDA_ARCHITECTURES=72 -DCMAKE_TOOLCHAIN_FILE=../Toolchain_aarch64_l4t.cmake -DCMAKE_BUILD_TYPE=Release ..
and then run ninja, I get this error:
[1/1] Linking CXX executable particle_test
FAILED: particle_test
: && /usr/bin/aarch64-linux-gnu-g++ -O3 -DNDEBUG -Wl,--allow-shlib-undefined CMakeFiles/particle_test.dir/test.cu.o CMakeFiles/particle_test.dir/cmake_device_link.o -o particle_test libparticles.a -lcudadevrt -lcudart_static -lrt -lpthread -ldl && :
/usr/lib/gcc-cross/aarch64-linux-gnu/7/../../../../aarch64-linux-gnu/bin/ld: cannot find -lcudadevrt
/usr/lib/gcc-cross/aarch64-linux-gnu/7/../../../../aarch64-linux-gnu/bin/ld: cannot find -lcudart_static
collect2: error: ld returned 1 exit status
ninja: build stopped: subcommand failed.
Simply adding this to the CMakeLists.txt
doesn't fix the issue:
find_package(CUDAToolkit)
#...
target_link_libraries(particles PRIVATE CUDA::cudart)
but this does
get_filename_component(_CUDA_LIBDIR ${CUDA_cudart_static_LIBRARY} DIRECTORY)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -L${_CUDA_LIBDIR}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${_CUDA_LIBDIR}")
but that really doesn't seem like it's the way it should be done.