VTKmCPUVectorization.cmake 7.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
##=============================================================================
##
##  Copyright (c) Kitware, Inc.
##  All rights reserved.
##  See LICENSE.txt for details.
##
##  This software is distributed WITHOUT ANY WARRANTY; without even
##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
##  PURPOSE.  See the above copyright notice for more information.
##
##  Copyright 2017 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
##  Copyright 2017 UT-Battelle, LLC.
##  Copyright 2017 Los Alamos National Security.
##
##  Under the terms of Contract DE-NA0003525 with NTESS,
##  the U.S. Government retains certain rights in this software.
##  Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
##  Laboratory (LANL), the U.S. Government retains certain rights in
##  this software.
##
##=============================================================================
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69

#Currently all we are going to build is a set of options that are possible
#based on the compiler. For now we are going on the presumption
#that x86 architecture is the only target for vectorization and therefore
#we don't need any system detect.
#
#Here is the breakdown of what each flag type means:
#
#  1. none:
#  Do not explicitly enable vectorization, but at the same don't explicitly disable
#  vectorization.
#
#  2. native:
#  Allow the compiler to use auto-detection based on the systems CPU to determine
#  the highest level of vectorization support that is allowed. This means that
#  libraries and executables built with this setting are non-portable.
#
#  3. avx
#  Compile with just AVX enabled, no AVX2 or AVX512 vectorization will be used.
#  This means that Sandy Bridge, Ivy Bridge, Haswell, and Skylake are supported,
#  but Haswell and newer will not use any AVX2 instructions
#
#  4. avx2
#  Compile with  AVX2/AVX enabled, no AVX512 vectorization will be used.
#  This means that Sandy Bridge, and Ivy Bridge can not run the code.
#
#  5. avx512
#  Compile with AVX512/AVX2/AVX options enabled.
#  This means that Sandy Bridge, Ivy Bridge, Haswell and can not run the code.
#  Only XeonPhi Knights Landing and Skylake processors can run the code.
#
#  AVX512 is designed to mix with avx/avx2 without any performance penalties,
#  so we enable AVX2 so that we get AVX2 support for < 32bit value types which
#  AVX512 has less support for
#
#
# I wonder if we should go towards a per platform cmake include that stores
# all this knowledge
#   include(gcc.cmake)
#   include(icc.cmake)
#   include(clang.cmake)
#
# This way we could also do compile warning flag detection at the same time
#
#
# Note: By default we use 'native' as the default option
#
#
70

luz.paz's avatar
luz.paz committed
71
# guard against building vectorization_flags more than once
72
if(TARGET vtkm_vectorization_flags)
73
  return()
74 75
endif()

76
add_library(vtkm_vectorization_flags INTERFACE)
77 78 79 80 81 82 83 84 85 86
if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
  install(TARGETS vtkm_vectorization_flags EXPORT ${VTKm_EXPORT_NAME})
endif()

# If we are using MSVC stop after the interface so that the interface is
# consistently defined even for compilers such as MSVC that we don't
# have vectorization flag support for yet.
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
  return()
endif()
87 88 89 90

set(vec_levels none native)

if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
91 92
  #for now we presume gcc > 4.6
  list(APPEND vec_levels avx)
93

94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
  #common flags for the avx instructions for the gcc compiler
  set(native_flags -march=native)
  set(avx_flags -mavx)
  set(avx2_flags ${avx_flags} -mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2)
  if (CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.7 OR
  CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.7)
  #if GNU is less than 4.9 you get avx, avx2
  list(APPEND vec_levels avx2)
elseif(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.1)
  #if GNU is less than 5.1 you get avx, avx2, and some avx512
  list(APPEND vec_levels avx2 avx512-knl)
  set(knl_flags ${avx2_flags} -mavx512f -mavx512pf -mavx512er -mavx512cd)
else()
  #if GNU is 5.1+ you get avx, avx2, and more avx512
  list(APPEND vec_levels avx2 avx512-skx avx512-knl)
  set(knl_flags ${avx2_flags} -mavx512f -mavx512pf -mavx512er -mavx512cd)
  set(skylake_flags ${avx2_flags} -mavx512f -mavx512dq -mavx512cd -mavx512bw -mavx512vl)
endif()
112
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
113 114 115 116 117 118
  list(APPEND vec_levels avx avx2 avx512-skx avx512-knl)
  set(native_flags -march=native)
  set(avx_flags -mavx)
  set(avx2_flags ${avx_flags} -mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2)
  set(knl_flags ${avx2_flags} -avx512f -avx512cd -avx512dq -avx512bw -avx512vl)
  set(skylake_flags ${avx2_flags} -avx512f -avx512cd -avx512er -avx512pf)
119
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
120 121 122 123 124
  #While Clang support AVX512, no version of AppleClang has that support yet
  list(APPEND vec_levels avx avx2)
  set(native_flags -march=native)
  set(avx_flags -mavx)
  set(avx2_flags ${avx_flags} -mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2)
125
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "PGI")
126 127 128
  #I can't find documentation to explicitly state the level of vectorization
  #support I want from the PGI compiler
  #so for now we are going to do nothing
129
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
130 131
  #Intel 15.X is the first version with avx512
  #Intel 16.X has way better vector generation compared to 15.X though
132

133 134 135
  set(native_flags -xHost)
  set(avx_flags  -xAVX)
  set(avx2_flags -xCORE-AVX2)
136

137 138 139 140 141 142 143 144 145 146 147 148
  if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 15.0)
    message(STATUS "While Intel ICC 14.0 and lower support #pragma simd")
    message(STATUS "The code that is generated in testing has caused SIGBUS")
    message(STATUS "runtime signals to be thrown. We recommend you upgrade")
    message(STATUS "or disable vectorization.")
  elseif (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16.0)
    list(APPEND vec_levels avx avx2)
  else()
    list(APPEND vec_levels avx avx2 avx512-skx avx512-knl)
    set(knl_flags ${knl_flags} -xMIC-AVX512)
    set(skylake_flags ${skylake_flags} -xCORE-AVX512)
  endif()
149 150 151 152 153 154
endif()

set_property(GLOBAL PROPERTY VTKm_NATIVE_FLAGS ${native_flags})
set_property(GLOBAL PROPERTY VTKm_AVX_FLAGS ${avx_flags})
set_property(GLOBAL PROPERTY VTKm_AVX2_FLAGS ${avx2_flags})

155 156
set_property(GLOBAL PROPERTY VTKm_KNL_FLAGS ${knl_flags})
set_property(GLOBAL PROPERTY VTKm_SKYLAKE_FLAGS ${skylake_flags})
157 158 159 160 161 162 163 164

# Now that we have set up what levels the compiler lets setup the CMake option
# We use a combo box style property, so that ccmake and cmake-gui have a
# nice interface
#
set(VTKm_Vectorization "none" CACHE STRING "Level of compiler vectorization support")
set_property(CACHE VTKm_Vectorization PROPERTY STRINGS ${vec_levels})
if (NOT ${VTKm_Vectorization} STREQUAL "none")
165
  set(VTKM_VECTORIZATION_ENABLED "ON")
166 167 168 169 170 171 172 173
endif()

#
# Now that we have set up the options, lets setup the compile flags that
# we are going to require.
#
set(flags)
if(VTKm_Vectorization STREQUAL "native")
174
  get_property(flags GLOBAL PROPERTY VTKm_NATIVE_FLAGS)
175
elseif(VTKm_Vectorization STREQUAL "avx")
176
  get_property(flags GLOBAL PROPERTY VTKm_AVX_FLAGS)
177
elseif(VTKm_Vectorization STREQUAL "avx2")
178
  get_property(flags GLOBAL PROPERTY VTKm_AVX2_FLAGS)
179
elseif(VTKm_Vectorization STREQUAL "avx512-skx")
180
  get_property(flags GLOBAL PROPERTY VTKm_SKYLAKE_FLAGS)
181
elseif(VTKm_Vectorization STREQUAL "avx512-knl")
182
  get_property(flags GLOBAL PROPERTY VTKm_KNL_FLAGS)
183 184
endif()

185 186 187
target_compile_options(vtkm_vectorization_flags
  INTERFACE $<$<COMPILE_LANGUAGE:CXX>:${flags}>
  )
188

189 190 191 192
if(TARGET vtkm::cuda AND flags AND NOT CMAKE_CUDA_HOST_COMPILER)
  # Also propagate down these optimizations when building host side code
  # with cuda. To be safe we only do this when we know the C++ and CUDA
  # host compiler are from the same vendor
193
  string(REGEX REPLACE ";" "," cuda_flags "${flags}")
194
  target_compile_options(vtkm_vectorization_flags
195
    INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=${cuda_flags}>
196
    )
197
endif()