ggml : fix BLAS with unsupported types (llama/9775)
* ggml : do not use BLAS with types without to_float * ggml : return pointer from ggml_internal_get_type_traits to avoid unnecessary copies * ggml : rename ggml_internal_get_type_traits -> ggml_get_type_traits it's not really internal if everybody uses it
This commit is contained in:
parent
44bc2767fd
commit
1531259b2c
@ -2536,7 +2536,7 @@ extern "C" {
|
|||||||
typedef void (*ggml_gemm_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
|
typedef void (*ggml_gemm_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
|
||||||
const void * GGML_RESTRICT y, int nr, int nc);
|
const void * GGML_RESTRICT y, int nr, int nc);
|
||||||
|
|
||||||
typedef struct {
|
struct ggml_type_traits {
|
||||||
const char * type_name;
|
const char * type_name;
|
||||||
int64_t blck_size;
|
int64_t blck_size;
|
||||||
int64_t blck_size_interleave; // interleave elements in blocks
|
int64_t blck_size_interleave; // interleave elements in blocks
|
||||||
@ -2552,9 +2552,9 @@ extern "C" {
|
|||||||
int64_t ncols; // number of columns to process simultaneously
|
int64_t ncols; // number of columns to process simultaneously
|
||||||
ggml_gemv_t gemv;
|
ggml_gemv_t gemv;
|
||||||
ggml_gemm_t gemm;
|
ggml_gemm_t gemm;
|
||||||
} ggml_type_traits_t;
|
};
|
||||||
|
|
||||||
GGML_API ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
|
GGML_API const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
@ -1177,7 +1177,7 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
|
|||||||
op->type != GGML_TYPE_IQ1_S &&
|
op->type != GGML_TYPE_IQ1_S &&
|
||||||
op->type != GGML_TYPE_IQ1_M; // missing type_traits.from_float
|
op->type != GGML_TYPE_IQ1_M; // missing type_traits.from_float
|
||||||
case GGML_OP_MUL_MAT:
|
case GGML_OP_MUL_MAT:
|
||||||
return op->src[1]->type == GGML_TYPE_F32 || op->src[1]->type == ggml_internal_get_type_traits(op->src[0]->type).vec_dot_type;
|
return op->src[1]->type == GGML_TYPE_F32 || op->src[1]->type == ggml_get_type_traits(op->src[0]->type)->vec_dot_type;
|
||||||
case GGML_OP_ROPE_BACK:
|
case GGML_OP_ROPE_BACK:
|
||||||
return op->src[2] == NULL && (op->op_params[2] & 4) == 0;
|
return op->src[2] == NULL && (op->op_params[2] & 4) == 0;
|
||||||
case GGML_OP_IM2COL_BACK:
|
case GGML_OP_IM2COL_BACK:
|
||||||
|
@ -65,8 +65,8 @@ static void ggml_backend_blas_mul_mat(ggml_backend_blas_context * ctx, struct gg
|
|||||||
|
|
||||||
// convert src0 to float
|
// convert src0 to float
|
||||||
if (type != GGML_TYPE_F32) {
|
if (type != GGML_TYPE_F32) {
|
||||||
ggml_type_traits_t type_traits = ggml_internal_get_type_traits(type);
|
const auto * type_traits = ggml_get_type_traits(type);
|
||||||
ggml_to_float_t const to_float = type_traits.to_float;
|
ggml_to_float_t const to_float = type_traits->to_float;
|
||||||
|
|
||||||
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
||||||
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
||||||
@ -420,19 +420,21 @@ static bool ggml_backend_blas_device_supports_op(ggml_backend_dev_t dev, const s
|
|||||||
// TODO: find the optimal value
|
// TODO: find the optimal value
|
||||||
const int64_t min_batch = 32;
|
const int64_t min_batch = 32;
|
||||||
|
|
||||||
return (ggml_is_contiguous(src0) &&
|
return ggml_is_contiguous(src0) &&
|
||||||
ggml_is_contiguous(src1) &&
|
ggml_is_contiguous(src1) &&
|
||||||
src1->type == GGML_TYPE_F32 &&
|
src1->type == GGML_TYPE_F32 &&
|
||||||
(ne0 >= min_batch && ne1 >= min_batch && ne10 >= min_batch));
|
(ne0 >= min_batch && ne1 >= min_batch && ne10 >= min_batch) &&
|
||||||
|
(src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
case GGML_OP_OUT_PROD:
|
case GGML_OP_OUT_PROD:
|
||||||
return (op->src[0]->type == GGML_TYPE_F32 &&
|
return op->src[0]->type == GGML_TYPE_F32 &&
|
||||||
op->src[1]->type == GGML_TYPE_F32 &&
|
op->src[1]->type == GGML_TYPE_F32 &&
|
||||||
ggml_is_matrix(src0) &&
|
ggml_is_matrix(src0) &&
|
||||||
ggml_is_matrix(src1) &&
|
ggml_is_matrix(src1) &&
|
||||||
ggml_is_contiguous(src0) &&
|
ggml_is_contiguous(src0) &&
|
||||||
(ggml_is_contiguous(src1) || ggml_is_transposed(src1)));
|
(ggml_is_contiguous(src1) || ggml_is_transposed(src1)) &&
|
||||||
|
(src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL);
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
|
@ -5287,9 +5287,9 @@ static void ggml_vk_dequantize_data(const void * from, float * to, size_t ne, gg
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_type_traits_t tt = ggml_internal_get_type_traits(quant);
|
const auto * tt = ggml_get_type_traits(quant);
|
||||||
|
|
||||||
ggml_to_float_t dequant_fn = tt.to_float;
|
ggml_to_float_t dequant_fn = tt->to_float;
|
||||||
|
|
||||||
dequant_fn(from, to, ne);
|
dequant_fn(from, to, ne);
|
||||||
}
|
}
|
||||||
|
@ -730,7 +730,7 @@ static void ggml_vec_dot_f32(int n, float * restrict s, size_t bs, const float *
|
|||||||
static void ggml_vec_dot_f16(int n, float * restrict s, size_t bs, ggml_fp16_t * restrict x, size_t bx, ggml_fp16_t * restrict y, size_t by, int nrc);
|
static void ggml_vec_dot_f16(int n, float * restrict s, size_t bs, ggml_fp16_t * restrict x, size_t bx, ggml_fp16_t * restrict y, size_t by, int nrc);
|
||||||
static void ggml_vec_dot_bf16(int n, float * restrict s, size_t bs, ggml_bf16_t * restrict x, size_t bx, ggml_bf16_t * restrict y, size_t by, int nrc);
|
static void ggml_vec_dot_bf16(int n, float * restrict s, size_t bs, ggml_bf16_t * restrict x, size_t bx, ggml_bf16_t * restrict y, size_t by, int nrc);
|
||||||
|
|
||||||
static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
||||||
[GGML_TYPE_I8] = {
|
[GGML_TYPE_I8] = {
|
||||||
.type_name = "i8",
|
.type_name = "i8",
|
||||||
.blck_size = 1,
|
.blck_size = 1,
|
||||||
@ -1152,9 +1152,9 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// For internal test use
|
// For internal test use
|
||||||
ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
|
const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type) {
|
||||||
GGML_ASSERT(type < GGML_TYPE_COUNT);
|
GGML_ASSERT(type < GGML_TYPE_COUNT);
|
||||||
return type_traits[type];
|
return &type_traits[type];
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
|
Loading…
Reference in New Issue
Block a user