From 3c5c751174d012648e756408cf95cfaabb09d93a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Sun, 17 Nov 2024 09:06:34 +0100 Subject: [PATCH] CMake: default to -arch=native for CUDA build (llama/10320) --- ggml/src/ggml-cuda/ggml/CMakeLists.txt | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/ggml/src/ggml-cuda/ggml/CMakeLists.txt b/ggml/src/ggml-cuda/ggml/CMakeLists.txt index 40ed2bd..860552f 100644 --- a/ggml/src/ggml-cuda/ggml/CMakeLists.txt +++ b/ggml/src/ggml-cuda/ggml/CMakeLists.txt @@ -6,15 +6,18 @@ if (CUDAToolkit_FOUND) message(STATUS "CUDA Toolkit found") if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES) - # 52 == lowest CUDA 12 standard - # 60 == FP16 CUDA intrinsics - # 61 == integer CUDA intrinsics - # 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster - if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16) + # native == GPUs available at build time + # 52 == Maxwell, lowest CUDA 12 standard + # 60 == P100, FP16 CUDA intrinsics + # 61 == Pascal, __dp4a instruction (per-byte integer dot product) + # 70 == V100, FP16 tensor cores + # 75 == Turing, int6 tensor cores + if (GGML_NATIVE AND CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.6") + set(CMAKE_CUDA_ARCHITECTURES "native") + elseif(GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16) set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75") else() set(CMAKE_CUDA_ARCHITECTURES "52;61;70;75") - #set(CMAKE_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work endif() endif() message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")