From 8923bb4292b7125d865269b973a242949c6d5a1a Mon Sep 17 00:00:00 2001 From: hipudding Date: Wed, 17 Jul 2024 19:23:50 +0800 Subject: [PATCH] Add Ascend NPU backend (llama/6035) * [CANN] Add Ascend NPU backend Ascend is a full-stack AI computing infrastructure for industry applications and services based on Huawei Ascend processors and software. CANN (Compute Architecture of Neural Networks), developped by Huawei, is a heterogeneous computing architecture for AI. Co-authored-by: wangshuai09 <391746016@qq.com> * delete trailing whitespaces * Modify the code based on review comment * Rename LLAMA_CANN to GGML_CANN * Make ggml-common.h private * add ggml_cann prefix for acl funcs * Add logging for CANN backend * Delete Trailing whitespace --------- Co-authored-by: wangshuai09 <391746016@qq.com> --- ggml/include/ggml.h | 3 ++ ggml/src/CMakeLists.txt | 69 +++++++++++++++++++++++++++++++++++++++++ ggml/src/ggml-backend.c | 5 +++ ggml/src/ggml.c | 11 ++++++- 4 files changed, 87 insertions(+), 1 deletion(-) diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index f2145ff..2fdb9fa 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -753,6 +753,8 @@ extern "C" { GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1); GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1); + GGML_API bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1); + // use this to compute the memory overhead of a tensor GGML_API size_t ggml_tensor_overhead(void); @@ -2397,6 +2399,7 @@ extern "C" { GGML_API int ggml_cpu_has_rpc (void); GGML_API int ggml_cpu_has_vsx (void); GGML_API int ggml_cpu_has_matmul_int8(void); + GGML_API int ggml_cpu_has_cann (void); // // Internal types and functions exposed for tests and benchmarks diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index cbadaf4..3f4c66b 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -770,6 +770,74 @@ if (GGML_CPU_HBM) target_link_libraries(ggml PUBLIC memkind) endif() +if (GGML_CANN) + if ("cann${CANN_INSTALL_DIR}" STREQUAL "cann" AND DEFINED ENV{ASCEND_TOOLKIT_HOME}) + set(CANN_INSTALL_DIR $ENV{ASCEND_TOOLKIT_HOME}) + message(STATUS "CANN: updated CANN_INSTALL_DIR from ASCEND_TOOLKIT_HOME=$ENV{ASCEND_TOOLKIT_HOME}") + endif() + + if (CANN_INSTALL_DIR) + # Only Support Linux. + if (GGML_CANN) + if (NOT UNIX) + set(GGML_CANN OFF) + message(WARNING "CANN: CANN toolkit supports unix but not ${CMAKE_SYSTEM_NAME}. Turning off GGML_CANN") + endif() + endif() + + # Supported platforms: x86-64, arm64 + if (GGML_CANN) + if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") + elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64") + else() + set(GGML_CANN OFF) + message(WARNING "CANN: CANN toolkit supports x86-64 and arm64 but not ${CMAKE_SYSTEM_PROCESSOR}. Turning off GGML_CANN") + endif() + endif() + + # Set header and libs + if(GGML_CANN) + set(CANN_INCLUDE_DIRS + ${CANN_INSTALL_DIR}/include + ${CANN_INSTALL_DIR}/include/aclnn + ${CANN_INSTALL_DIR}/acllib/include + ) + + # TODO: find libs + link_directories( + ${CANN_INSTALL_DIR}/lib64 + ) + + add_subdirectory(ggml-cann/kernels) + list(APPEND CANN_LIBRARIES + ascendcl + nnopbase + opapi + acl_op_compiler + ascendc_kernels + ) + + set(GGML_HEADERS_CANN "../include/ggml-cann.h") + file(GLOB GGML_SOURCES_CANN "ggml-cann/*.cpp") + list(APPEND GGML_SOURCES_CANN "ggml-cann.cpp") + + message(STATUS "CANN: CANN_INCLUDE_DIRS = ${CANN_INCLUDE_DIRS}") + message(STATUS "CANN: CANN_LIBRARIES = ${CANN_LIBRARIES}") + + set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${CANN_LIBRARIES} ) + set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${CANN_INCLUDE_DIRS}) + list(APPEND GGML_CDEF_PUBLIC GGML_USE_CANN) + endif() + else() + set(GGML_CANN OFF) + message(WARNING "CANN: Can't find CANN_INSTALL_DIR, do you forget to source set_var.sh. Turning off GGML_CANN") + endif() + + if(NOT GGML_CANN) + message(WARNING "CANN: GGML_CANN is turned OFF, see above for details.") + endif() +endif() + function(get_flags CCID CCVER) set(C_FLAGS "") set(CXX_FLAGS "") @@ -1184,6 +1252,7 @@ add_library(ggml ${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM} ${GGML_SOURCES_BLAS} ${GGML_HEADERS_BLAS} ${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE} + ${GGML_SOURCES_CANN} ${GGML_HEADERS_CANN} ggml-aarch64.c ggml-aarch64.h ) diff --git a/ggml/src/ggml-backend.c b/ggml/src/ggml-backend.c index dbbaa39..01c87ef 100644 --- a/ggml/src/ggml-backend.c +++ b/ggml/src/ggml-backend.c @@ -445,6 +445,11 @@ GGML_CALL static void ggml_backend_registry_init(void) { extern GGML_CALL void ggml_backend_kompute_reg_devices(void); ggml_backend_kompute_reg_devices(); #endif + +#ifdef GGML_USE_CANN + extern GGML_CALL int ggml_backend_cann_reg_devices(void); + ggml_backend_cann_reg_devices(); +#endif } GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) { diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 9e2b1d8..f98d73d 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -3341,7 +3341,7 @@ bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tenso } // check if t1 can be represented as a repeatition of t0 -static inline bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { +bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); return ggml_is_empty(t0) ? ggml_is_empty(t1) : @@ -13699,6 +13699,7 @@ static void ggml_compute_forward_soft_max( } } + // ggml_compute_forward_soft_max_back static void ggml_compute_forward_soft_max_back_f32( @@ -21994,6 +21995,14 @@ int ggml_cpu_has_rpc(void) { #endif } +int ggml_cpu_has_cann(void) { +#if defined(GGML_USE_CANN) + return 1; +#else + return 0; +#endif +} + int ggml_cpu_has_gpublas(void) { return ggml_cpu_has_cuda() || ggml_cpu_has_vulkan() || ggml_cpu_has_kompute() || ggml_cpu_has_sycl(); }