From 8e39ee171f9e8f34706b113d63ad93ec47f80ca8 Mon Sep 17 00:00:00 2001
From: matteo <matteogeniaccio@yahoo.it>
Date: Thu, 1 Aug 2024 23:28:28 +0200
Subject: [PATCH] ggml-cuda: Adding support for unified memory (llama/8035)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Adding support for unified memory

* adding again the documentation about unified memory

* refactoring: Moved the unified memory code in the correct location.

* Fixed compilation error when using hipblas

* cleaning up the documentation

* Updating the documentation

Co-authored-by: Johannes Gäßler <johannesg@5d6.de>

* adding one more case where the PR should not be enabled

---------

Co-authored-by: matteo serva <matteo.serva@gmail.com>
Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
---
 ggml/src/ggml-cuda.cu | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/ggml/src/ggml-cuda.cu b/ggml/src/ggml-cuda.cu
index b510777..68605ff 100644
--- a/ggml/src/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda.cu
@@ -130,7 +130,22 @@ static cudaError_t ggml_cuda_device_malloc(void ** ptr, size_t size, int device)
     }
     return res;
 #else
+
+#if !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA)
+    cudaError_t err;
+    if (getenv("GGML_CUDA_ENABLE_UNIFIED_MEMORY") != nullptr)
+    {
+        err = cudaMallocManaged(ptr, size);
+    }
+    else
+    {
+        err = cudaMalloc(ptr, size);
+    }
+    return err;
+#else
     return cudaMalloc(ptr, size);
+#endif // !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA)
+
 #endif
 }