Commit 850ef90a authored by Robert Maynard's avatar Robert Maynard Committed by Brad King
Browse files

CUDA: Honor CUDA_RESOLVE_DEVICE_SYMBOLS for more target types

`CUDA_RESOLVE_DEVICE_SYMBOLS` can be used with shared, module, and
executable target types.  This relaxation is to allow for better
interoperability with linkers that automatically do CUDA device symbol
resolution and have no way to disable it.
parent 6e91f5d6
CUDA_RESOLVE_DEVICE_SYMBOLS
---------------------------
CUDA only: Enables device linking for the specific static library target
CUDA only: Enables device linking for the specific library target
If set this will enable device linking on this static library target. Normally
If set this will enable device linking on the library target. Normally
device linking is deferred until a shared library or executable is generated,
allowing for multiple static libraries to resolve device symbols at the same
time.
time when they are used by a shared library or executable.
By default static library targets have this property is disabled,
while shared, module, and executable targets have this property enabled.
Note that device linking is not supported for :ref:`Object Libraries`.
For instance:
......
CUDA_RESOLVE_DEVICE_SYMBOLS
---------------------------
* The :prop_tgt:`CUDA_RESOLVE_DEVICE_SYMBOLS` target property is now supported
on shared library, module library, and executable targets. Previously it was
only honored on static libraries.
......@@ -95,7 +95,13 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
const bool hasCUDA =
(std::find(closure->Languages.begin(), closure->Languages.end(),
cuda_lang) != closure->Languages.end());
if (!hasCUDA) {
bool doDeviceLinking = true;
if (const char* resolveDeviceSymbols =
this->GeneratorTarget->GetProperty("CUDA_RESOLVE_DEVICE_SYMBOLS")) {
doDeviceLinking = cmSystemTools::IsOn(resolveDeviceSymbols);
}
if (!hasCUDA || !doDeviceLinking) {
return;
}
......
......@@ -133,9 +133,12 @@ void cmMakefileLibraryTargetGenerator::WriteStaticLibraryRules()
(std::find(closure->Languages.begin(), closure->Languages.end(),
cuda_lang) != closure->Languages.end());
const bool resolveDeviceSymbols =
this->GeneratorTarget->GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS");
if (hasCUDA && resolveDeviceSymbols) {
bool doDeviceLinking = false;
if (const char* resolveDeviceSymbols =
this->GeneratorTarget->GetProperty("CUDA_RESOLVE_DEVICE_SYMBOLS")) {
doDeviceLinking = cmSystemTools::IsOn(resolveDeviceSymbols);
}
if (hasCUDA && doDeviceLinking) {
std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY";
this->WriteDeviceLibraryRules(linkRuleVar, false);
}
......@@ -168,7 +171,12 @@ void cmMakefileLibraryTargetGenerator::WriteSharedLibraryRules(bool relink)
const bool hasCUDA =
(std::find(closure->Languages.begin(), closure->Languages.end(),
cuda_lang) != closure->Languages.end());
if (hasCUDA) {
bool doDeviceLinking = true;
if (const char* resolveDeviceSymbols =
this->GeneratorTarget->GetProperty("CUDA_RESOLVE_DEVICE_SYMBOLS")) {
doDeviceLinking = cmSystemTools::IsOn(resolveDeviceSymbols);
}
if (hasCUDA && doDeviceLinking) {
std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY";
this->WriteDeviceLibraryRules(linkRuleVar, relink);
}
......@@ -209,7 +217,12 @@ void cmMakefileLibraryTargetGenerator::WriteModuleLibraryRules(bool relink)
const bool hasCUDA =
(std::find(closure->Languages.begin(), closure->Languages.end(),
cuda_lang) != closure->Languages.end());
if (hasCUDA) {
bool doDeviceLinking = true;
if (const char* resolveDeviceSymbols =
this->GeneratorTarget->GetProperty("CUDA_RESOLVE_DEVICE_SYMBOLS")) {
doDeviceLinking = cmSystemTools::IsOn(resolveDeviceSymbols);
}
if (hasCUDA && doDeviceLinking) {
std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY";
this->WriteDeviceLibraryRules(linkRuleVar, relink);
}
......
......@@ -566,22 +566,23 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement()
(std::find(closure->Languages.begin(), closure->Languages.end(),
cuda_lang) != closure->Languages.end());
bool shouldHaveDeviceLinking = false;
switch (genTarget.GetType()) {
case cmStateEnums::SHARED_LIBRARY:
case cmStateEnums::MODULE_LIBRARY:
case cmStateEnums::EXECUTABLE:
shouldHaveDeviceLinking = true;
break;
case cmStateEnums::STATIC_LIBRARY:
shouldHaveDeviceLinking =
genTarget.GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS");
break;
default:
break;
bool doDeviceLinking = false;
if (const char* resolveDeviceSymbols =
genTarget.GetProperty("CUDA_RESOLVE_DEVICE_SYMBOLS")) {
doDeviceLinking = cmSystemTools::IsOn(resolveDeviceSymbols);
} else {
switch (genTarget.GetType()) {
case cmStateEnums::SHARED_LIBRARY:
case cmStateEnums::MODULE_LIBRARY:
case cmStateEnums::EXECUTABLE:
doDeviceLinking = true;
break;
default:
break;
}
}
if (!(shouldHaveDeviceLinking && hasCUDA)) {
if (!(doDeviceLinking && hasCUDA)) {
return;
}
......
......@@ -2998,18 +2998,19 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions(
// Determine if we need to do a device link
bool doDeviceLinking = false;
switch (this->GeneratorTarget->GetType()) {
case cmStateEnums::SHARED_LIBRARY:
case cmStateEnums::MODULE_LIBRARY:
case cmStateEnums::EXECUTABLE:
doDeviceLinking = true;
break;
case cmStateEnums::STATIC_LIBRARY:
doDeviceLinking = this->GeneratorTarget->GetPropertyAsBool(
"CUDA_RESOLVE_DEVICE_SYMBOLS");
break;
default:
break;
if (const char* resolveDeviceSymbols =
this->GeneratorTarget->GetProperty("CUDA_RESOLVE_DEVICE_SYMBOLS")) {
doDeviceLinking = cmSystemTools::IsOn(resolveDeviceSymbols);
} else {
switch (this->GeneratorTarget->GetType()) {
case cmStateEnums::SHARED_LIBRARY:
case cmStateEnums::MODULE_LIBRARY:
case cmStateEnums::EXECUTABLE:
doDeviceLinking = true;
break;
default:
break;
}
}
cudaLinkOptions.AddFlag("PerformDeviceLink",
......
......@@ -7,6 +7,17 @@ ADD_TEST_MACRO(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
ADD_TEST_MACRO(CudaOnly.WithDefs CudaOnlyWithDefs)
add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND
${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
--build-and-test
"${CMAKE_CURRENT_SOURCE_DIR}/DontResolveDeviceSymbols/"
"${CMAKE_CURRENT_BINARY_DIR}/DontResolveDeviceSymbols/"
${build_generator_args}
--build-project DontResolveDeviceSymbols
--build-options ${build_options}
--test-command ${CMAKE_CTEST_COMMAND} -V -C $<CONFIGURATION>
)
if(MSVC)
ADD_TEST_MACRO(CudaOnly.PDB CudaOnlyPDB)
endif()
cmake_minimum_required(VERSION 3.13)
project (DontResolveDeviceSymbols CUDA)
# Find nm and dumpbin
if(CMAKE_NM)
set(dump_command ${CMAKE_NM})
set(dump_args --defined-only)
set(symbol_name cudaRegisterLinkedBinary)
else()
include(GetPrerequisites)
message(STATUS "calling list_prerequisites to find dumpbin")
list_prerequisites("${CMAKE_COMMAND}" 0 0 0)
if(gp_dumpbin)
set(dump_command ${gp_dumpbin})
set(dump_args /SYMBOLS)
set(symbol_name nv_fatb)
endif()
endif()
#Goal for this example:
# Build a static library that defines multiple methods and kernels that
# use each other.
# Don't resolve the device symbols in the static library
# Don't resolve the device symbols in the executable library
# Verify that we can't use those device symbols from anything
string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=[compute_30] -gencode arch=compute_50,code=\\\"compute_50\\\"")
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CUDA_STANDARD 11)
add_library(CUDANoDeviceResolve SHARED file1.cu)
set_target_properties(CUDANoDeviceResolve
PROPERTIES
CUDA_SEPARABLE_COMPILATION ON
CUDA_RESOLVE_DEVICE_SYMBOLS OFF
POSITION_INDEPENDENT_CODE ON)
if(MSVC)
target_link_options(CUDANoDeviceResolve PRIVATE "/FORCE:UNRESOLVED")
endif()
if(dump_command)
add_custom_command(TARGET CUDANoDeviceResolve POST_BUILD
COMMAND ${CMAKE_COMMAND}
-DDUMP_COMMAND=${dump_command}
-DDUMP_ARGS=${dump_args}
-DSYMBOL_NAME=${symbol_name}
-DTEST_LIBRARY_PATH=$<TARGET_FILE:CUDANoDeviceResolve>
-P ${CMAKE_CURRENT_SOURCE_DIR}/verify.cmake
)
endif()
#include <iostream>
static __global__ void file1_kernel(int in, int* out)
{
*out = in * in;
}
int choose_cuda_device()
{
int nDevices = 0;
cudaError_t err = cudaGetDeviceCount(&nDevices);
if (err != cudaSuccess) {
std::cerr << "Failed to retrieve the number of CUDA enabled devices"
<< std::endl;
return 1;
}
for (int i = 0; i < nDevices; ++i) {
cudaDeviceProp prop;
cudaError_t err = cudaGetDeviceProperties(&prop, i);
if (err != cudaSuccess) {
std::cerr << "Could not retrieve properties from CUDA device " << i
<< std::endl;
return 1;
}
std::cout << "prop.major: " << prop.major << std::endl;
if (prop.major >= 3) {
err = cudaSetDevice(i);
if (err != cudaSuccess) {
std::cout << "Could not select CUDA device " << i << std::endl;
} else {
return 0;
}
}
}
std::cout << "Could not find a CUDA enabled card supporting compute >=3.0"
<< std::endl;
return 1;
}
int file1_launch_kernel()
{
int ret = choose_cuda_device();
if (ret) {
return 0;
}
int input = 4;
int* output;
cudaError_t err = cudaMallocManaged(&output, sizeof(int));
cudaDeviceSynchronize();
if (err != cudaSuccess) {
return 1;
}
file1_kernel<<<1, 1>>>(input, output);
cudaDeviceSynchronize();
err = cudaGetLastError();
std::cout << err << " " << cudaGetErrorString(err) << std::endl;
if (err == cudaSuccess) {
// This kernel launch should failed as the device linking never occured
std::cerr << "file1_kernel: kernel launch should have failed" << std::endl;
return 1;
}
return 0;
}
#include <iostream>
int main(int argc, char** argv)
{
return 0;
}
execute_process(COMMAND ${DUMP_COMMAND} ${DUMP_ARGS} ${TEST_LIBRARY_PATH}
RESULT_VARIABLE RESULT
OUTPUT_VARIABLE OUTPUT
ERROR_VARIABLE ERROR
)
if(NOT "${RESULT}" STREQUAL "0")
message(FATAL_ERROR "${DUMP_COMMAND} failed [${RESULT}] [${OUTPUT}] [${ERROR}]")
endif()
if("${OUTPUT}" MATCHES "${SYMBOL_NAME}")
message(FATAL_ERROR
"The '${SYMBOL_NAME}' symbol is defined; device linking occurred!")
endif()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment