From 8e39ee171f9e8f34706b113d63ad93ec47f80ca8 Mon Sep 17 00:00:00 2001 From: matteo Date: Thu, 1 Aug 2024 23:28:28 +0200 Subject: [PATCH] ggml-cuda: Adding support for unified memory (llama/8035) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Adding support for unified memory * adding again the documentation about unified memory * refactoring: Moved the unified memory code in the correct location. * Fixed compilation error when using hipblas * cleaning up the documentation * Updating the documentation Co-authored-by: Johannes Gäßler * adding one more case where the PR should not be enabled --------- Co-authored-by: matteo serva Co-authored-by: Johannes Gäßler --- ggml/src/ggml-cuda.cu | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ggml/src/ggml-cuda.cu b/ggml/src/ggml-cuda.cu index b510777..68605ff 100644 --- a/ggml/src/ggml-cuda.cu +++ b/ggml/src/ggml-cuda.cu @@ -130,7 +130,22 @@ static cudaError_t ggml_cuda_device_malloc(void ** ptr, size_t size, int device) } return res; #else + +#if !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA) + cudaError_t err; + if (getenv("GGML_CUDA_ENABLE_UNIFIED_MEMORY") != nullptr) + { + err = cudaMallocManaged(ptr, size); + } + else + { + err = cudaMalloc(ptr, size); + } + return err; +#else return cudaMalloc(ptr, size); +#endif // !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA) + #endif }