ggml : add metal backend registry / device (llama/9713)

* ggml : add metal backend registry / device

ggml-ci

* metal : fix names [no ci]

* metal : global registry and device instances

ggml-ci

* cont : alternative initialization of global objects

ggml-ci

* llama : adapt to backend changes

ggml-ci

* fixes

* metal : fix indent

* metal : fix build when MTLGPUFamilyApple3 is not available

ggml-ci

* fix merge

* metal : avoid unnecessary singleton accesses

ggml-ci

* metal : minor fix [no ci]

* metal : g_state -> g_ggml_ctx_dev_main [no ci]

* metal : avoid reference of device context in the backend context

ggml-ci

* metal : minor [no ci]

* metal : fix maxTransferRate check

* metal : remove transfer rate stuff

---------

Co-authored-by: slaren <slarengh@gmail.com>
This commit is contained in:
Georgi Gerganov 2024-10-07 18:27:51 +03:00
parent 80753d4da8
commit 315364d7de
5 changed files with 514 additions and 233 deletions

View File

@ -127,6 +127,8 @@ extern "C" {
bool async; bool async;
// pinned host buffer // pinned host buffer
bool host_buffer; bool host_buffer;
// creating buffers from host ptr
bool buffer_from_host_ptr;
// event synchronization // event synchronization
bool events; bool events;
}; };

View File

@ -43,7 +43,9 @@ GGML_API ggml_backend_t ggml_backend_metal_init(void);
GGML_API bool ggml_backend_is_metal(ggml_backend_t backend); GGML_API bool ggml_backend_is_metal(ggml_backend_t backend);
GGML_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size); GGML_DEPRECATED(
GGML_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size),
"obsoleted by the new device interface - https://github.com/ggerganov/llama.cpp/pull/9713");
GGML_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data); GGML_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
@ -57,6 +59,8 @@ GGML_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int fam
// capture all command buffers committed the next time `ggml_backend_graph_compute` is called // capture all command buffers committed the next time `ggml_backend_graph_compute` is called
GGML_API void ggml_backend_metal_capture_next_compute(ggml_backend_t backend); GGML_API void ggml_backend_metal_capture_next_compute(ggml_backend_t backend);
GGML_API ggml_backend_reg_t ggml_backend_metal_reg(void);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@ -463,6 +463,7 @@ enum ggml_backend_dev_type ggml_backend_dev_type(ggml_backend_dev_t device) {
} }
void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_dev_props * props) { void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_dev_props * props) {
memset(props, 0, sizeof(*props));
device->iface.get_props(device, props); device->iface.get_props(device, props);
} }
@ -479,6 +480,10 @@ ggml_backend_buffer_type_t ggml_backend_dev_buffer_type(ggml_backend_dev_t devic
} }
ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device) { ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device) {
if (device->iface.get_host_buffer_type == NULL) {
return NULL;
}
return device->iface.get_host_buffer_type(device); return device->iface.get_host_buffer_type(device);
} }
@ -525,6 +530,10 @@ void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * na
#include "ggml-cuda.h" #include "ggml-cuda.h"
#endif #endif
#ifdef GGML_USE_METAL
#include "ggml-metal.h"
#endif
struct ggml_backend_registry { struct ggml_backend_registry {
std::vector<ggml_backend_reg_t> backends; std::vector<ggml_backend_reg_t> backends;
std::vector<ggml_backend_dev_t> devices; std::vector<ggml_backend_dev_t> devices;
@ -533,10 +542,13 @@ struct ggml_backend_registry {
#ifdef GGML_USE_CUDA #ifdef GGML_USE_CUDA
register_backend(ggml_backend_cuda_reg()); register_backend(ggml_backend_cuda_reg());
#endif #endif
#ifdef GGML_USE_METAL
register_backend(ggml_backend_metal_reg());
#endif
register_backend(ggml_backend_cpu_reg()); register_backend(ggml_backend_cpu_reg());
// TODO: sycl, metal, vulkan, kompute, cann // TODO: sycl, vulkan, kompute, cann
} }
void register_backend(ggml_backend_reg_t reg) { void register_backend(ggml_backend_reg_t reg) {
@ -1118,9 +1130,10 @@ static void ggml_backend_cpu_device_get_props(ggml_backend_dev_t dev, struct ggm
props->type = ggml_backend_cpu_device_get_type(dev); props->type = ggml_backend_cpu_device_get_type(dev);
ggml_backend_cpu_device_get_memory(dev, &props->memory_free, &props->memory_total); ggml_backend_cpu_device_get_memory(dev, &props->memory_free, &props->memory_total);
props->caps = { props->caps = {
/* async */ false, /* .async = */ false,
/* host_buffer */ false, /* .host_buffer = */ false,
/* events */ false, /* .buffer_from_host_ptr = */ true,
/* .events = */ false,
}; };
} }

View File

@ -2920,9 +2920,10 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
#endif #endif
props->caps = { props->caps = {
/* async */ true, /* .async = */ true,
/* host_buffer */ host_buffer, /* .host_buffer = */ host_buffer,
/* events */ events, /* .buffer_from_host_ptr = */ false,
/* .events = */ events,
}; };
} }

File diff suppressed because it is too large Load Diff