build : add WHISPER_COREML_ALLOW_FALLBACK to make / CMake (#812)

This commit is contained in:
Georgi Gerganov 2023-04-29 10:55:24 +03:00
parent 94a7cd2a07
commit 3efb81dec6
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735
3 changed files with 93 additions and 76 deletions

View File

@ -60,6 +60,7 @@ if (APPLE)
option(WHISPER_NO_FMA "whisper: disable FMA" OFF) option(WHISPER_NO_FMA "whisper: disable FMA" OFF)
option(WHISPER_COREML "whisper: enable Core ML framework" OFF) option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
else() else()
option(WHISPER_SUPPORT_OPENBLAS "whisper: support for OpenBLAS" OFF) option(WHISPER_SUPPORT_OPENBLAS "whisper: support for OpenBLAS" OFF)
endif() endif()
@ -119,6 +120,10 @@ if (APPLE)
else() else()
message(WARNING "CoreML framework not found") message(WARNING "CoreML framework not found")
endif() endif()
if (WHISPER_COREML_ALLOW_FALLBACK)
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_COREML_ALLOW_FALLBACK)
endif()
endif() endif()
endif() endif()

View File

@ -123,6 +123,7 @@ endif
ifeq ($(UNAME_M),amd64) ifeq ($(UNAME_M),amd64)
CFLAGS += -mavx -mavx2 -mfma -mf16c CFLAGS += -mavx -mavx2 -mfma -mf16c
endif endif
ifneq ($(filter ppc64%,$(UNAME_M)),) ifneq ($(filter ppc64%,$(UNAME_M)),)
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo) POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
ifneq (,$(findstring POWER9,$(POWER9_M))) ifneq (,$(findstring POWER9,$(POWER9_M)))
@ -133,6 +134,7 @@ ifneq ($(filter ppc64%,$(UNAME_M)),)
CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
endif endif
endif endif
ifndef WHISPER_NO_ACCELERATE ifndef WHISPER_NO_ACCELERATE
# Mac M1 - include Accelerate framework # Mac M1 - include Accelerate framework
ifeq ($(UNAME_S),Darwin) ifeq ($(UNAME_S),Darwin)
@ -140,26 +142,36 @@ ifndef WHISPER_NO_ACCELERATE
LDFLAGS += -framework Accelerate LDFLAGS += -framework Accelerate
endif endif
endif endif
ifdef WHISPER_COREML ifdef WHISPER_COREML
CXXFLAGS += -DWHISPER_USE_COREML CXXFLAGS += -DWHISPER_USE_COREML
LDFLAGS += -framework Foundation -framework CoreML LDFLAGS += -framework Foundation -framework CoreML
ifdef WHISPER_COREML_ALLOW_FALLBACK
CXXFLAGS += -DWHISPER_COREML_ALLOW_FALLBACK
endif endif
endif
ifdef WHISPER_OPENBLAS ifdef WHISPER_OPENBLAS
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
LDFLAGS += -lopenblas LDFLAGS += -lopenblas
endif endif
ifdef WHISPER_GPROF ifdef WHISPER_GPROF
CFLAGS += -pg CFLAGS += -pg
CXXFLAGS += -pg CXXFLAGS += -pg
endif endif
ifneq ($(filter aarch64%,$(UNAME_M)),) ifneq ($(filter aarch64%,$(UNAME_M)),)
CFLAGS += -mcpu=native CFLAGS += -mcpu=native
CXXFLAGS += -mcpu=native CXXFLAGS += -mcpu=native
endif endif
ifneq ($(filter armv6%,$(UNAME_M)),) ifneq ($(filter armv6%,$(UNAME_M)),)
# 32-bit Raspberry Pi 1, 2, 3 # 32-bit Raspberry Pi 1, 2, 3
CFLAGS += -mfpu=neon -mfp16-format=ieee -mno-unaligned-access CFLAGS += -mfpu=neon -mfp16-format=ieee -mno-unaligned-access
endif endif
ifneq ($(filter armv7%,$(UNAME_M)),) ifneq ($(filter armv7%,$(UNAME_M)),)
# 32-bit ARM, for example on Armbian or possibly raspbian # 32-bit ARM, for example on Armbian or possibly raspbian
CFLAGS += -mfpu=neon -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations CFLAGS += -mfpu=neon -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
@ -167,6 +179,7 @@ ifneq ($(filter armv7%,$(UNAME_M)),)
# 64-bit ARM, use these (TODO: auto-detect 64-bit) # 64-bit ARM, use these (TODO: auto-detect 64-bit)
# CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations # CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
endif endif
ifneq ($(filter armv8%,$(UNAME_M)),) ifneq ($(filter armv8%,$(UNAME_M)),)
# Raspberry Pi 4 # Raspberry Pi 4
CFLAGS += -mfp16-format=ieee -mno-unaligned-access CFLAGS += -mfp16-format=ieee -mno-unaligned-access

View File

@ -1393,8 +1393,7 @@ static bool whisper_encode_internal(
const bool use_coreml = wstate.ctx_coreml != nullptr; const bool use_coreml = wstate.ctx_coreml != nullptr;
#endif #endif
if (!use_coreml) if (!use_coreml) {
{
// convolution + gelu // convolution + gelu
{ {
wstate.use_buf(ctx0, 1); wstate.use_buf(ctx0, 1);
@ -1504,7 +1503,7 @@ static bool whisper_encode_internal(
wstate.use_buf(ctx0, 0); wstate.use_buf(ctx0, 0);
#ifdef WHISPER_USE_FLASH_ATTN #ifdef WHISPER_USE_FLASH_ATTN
struct ggml_tensor * Q = struct ggml_tensor * Q =
ggml_permute(ctx0, ggml_permute(ctx0,
ggml_cpy(ctx0, ggml_cpy(ctx0,
@ -1529,7 +1528,7 @@ static bool whisper_encode_internal(
ggml_new_tensor_3d(ctx0, wctx.wtype, n_ctx, n_state/n_head, n_head)); ggml_new_tensor_3d(ctx0, wctx.wtype, n_ctx, n_state/n_head, n_head));
struct ggml_tensor * KQV = ggml_flash_attn(ctx0, Q, K, V, false); struct ggml_tensor * KQV = ggml_flash_attn(ctx0, Q, K, V, false);
#else #else
struct ggml_tensor * Q = struct ggml_tensor * Q =
ggml_permute(ctx0, ggml_permute(ctx0,
ggml_cpy(ctx0, ggml_cpy(ctx0,
@ -1575,7 +1574,7 @@ static bool whisper_encode_internal(
); );
struct ggml_tensor * KQV = ggml_mul_mat(ctx0, ggml_transpose(ctx0, V), KQ_soft_max); struct ggml_tensor * KQV = ggml_mul_mat(ctx0, ggml_transpose(ctx0, V), KQ_soft_max);
#endif #endif
struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3); struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3);
wstate.use_buf(ctx0, 1); wstate.use_buf(ctx0, 1);
@ -1625,13 +1624,13 @@ static bool whisper_encode_internal(
ggml_repeat(ctx0, layer.mlp_ln_b, cur)); ggml_repeat(ctx0, layer.mlp_ln_b, cur));
} }
#ifdef WHISPER_USE_FLASH_FF #ifdef WHISPER_USE_FLASH_FF
wstate.use_buf(ctx0, 0); wstate.use_buf(ctx0, 0);
cur = ggml_flash_ff(ctx0, cur = ggml_flash_ff(ctx0,
ggml_cpy(ctx0, cur, ggml_new_tensor_2d(ctx0, wstate.wtype, n_state, n_ctx)), ggml_cpy(ctx0, cur, ggml_new_tensor_2d(ctx0, wstate.wtype, n_state, n_ctx)),
layer.mlp_0_w, layer.mlp_0_b, layer.mlp_1_w, layer.mlp_1_b); layer.mlp_0_w, layer.mlp_0_b, layer.mlp_1_w, layer.mlp_1_b);
#else #else
wstate.use_buf(ctx0, 0); wstate.use_buf(ctx0, 0);
// fully connected // fully connected
@ -1662,7 +1661,7 @@ static bool whisper_encode_internal(
cur = ggml_add(ctx0, cur = ggml_add(ctx0,
ggml_repeat(ctx0, layer.mlp_1_b, cur), ggml_repeat(ctx0, layer.mlp_1_b, cur),
cur); cur);
#endif #endif
} }
wstate.use_buf(ctx0, 3); wstate.use_buf(ctx0, 3);