ggml-cuda: Adding support for unified memory (llama/8035)

* Adding support for unified memory * adding again the documentation about unified memory * refactoring: Moved the unified memory code in the correct location. * Fixed compilation error when using hipblas * cleaning up the documentation * Updating the documentation Co-authored-by: Johannes Gäßler <johannesg@5d6.de> * adding one more case where the PR should not be enabled --------- Co-authored-by: matteo serva <matteo.serva@gmail.com> Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
2024-08-01 23:28:28 +02:00 · 2024-08-01 23:28:28 +02:00 · 8e39ee171f
commit 8e39ee171f
parent d26250f78c
1 changed files with 15 additions and 0 deletions
--- a/ggml/src/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda.cu
@ -130,7 +130,22 @@ static cudaError_t ggml_cuda_device_malloc(void ** ptr, size_t size, int device)
    }
    return res;
 #else
 #if !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA)
    cudaError_t err;
    if (getenv("GGML_CUDA_ENABLE_UNIFIED_MEMORY") != nullptr)
    {
        err = cudaMallocManaged(ptr, size);
    }
    else
    {
        err = cudaMalloc(ptr, size);
    }
    return err;
 #else
    return cudaMalloc(ptr, size);
 #endif // !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA)
 #endif
 }