From 05c3ea3bc8a6e6d2374888d3133535eebfa3ec05 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sat, 29 Apr 2023 19:30:22 +0300 Subject: [PATCH] ggml : sync with ggml repo (warning fixes + asserts) --- ggml.c | 24 +++++++++++++++--------- ggml.h | 4 ++-- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/ggml.c b/ggml.c index 2ec0c0b..ebbaf11 100644 --- a/ggml.c +++ b/ggml.c @@ -8245,8 +8245,6 @@ static void ggml_compute_forward_mul_mat_f16_f32( ggml_fp16_t * d_X = ggml_cuda_pool_malloc(sizeof(float) * x_ne, &x_size); ggml_fp16_t * d_Y = ggml_cuda_pool_malloc(sizeof(float) * y_ne, &y_size); float * d_D = ggml_cuda_pool_malloc(sizeof(float) * d_ne, &d_size); -#else - float * const wdata = params->wdata; #endif for (int64_t i03 = 0; i03 < ne03; i03++) { for (int64_t i02 = 0; i02 < ne02; i02++) { @@ -8263,8 +8261,11 @@ static void ggml_compute_forward_mul_mat_f16_f32( wdata[id++] = GGML_FP32_TO_FP16(*(float *) ((char *) src1->data + i03*nb13 + i02*nb12 + i01*nb11 + i00*nb10)); } } + + assert(id*sizeof(ggml_fp16_t) <= params->wsize); } #else + float * const wdata = params->wdata; { size_t id = 0; for (int64_t i01 = 0; i01 < ne01; ++i01) { @@ -8272,6 +8273,8 @@ static void ggml_compute_forward_mul_mat_f16_f32( wdata[id++] = GGML_FP16_TO_FP32(*(ggml_fp16_t *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01 + i00*nb00)); } } + + assert(id*sizeof(float) <= params->wsize); } #endif @@ -8537,7 +8540,10 @@ static void ggml_compute_forward_mul_mat_q_f32( dequantize_row_q((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01, wdata + id, ne00); id += ne00; } + + assert(id*sizeof(float) <= params->wsize); } + const float * x = wdata; #endif @@ -9118,7 +9124,7 @@ static void ggml_compute_forward_alibi_f32( //const int nb3 = src0->nb[3]; assert(nb0 == sizeof(float)); - assert(ne1+n_past == ne0); + assert(ne1 + n_past == ne0); (void) n_past; // add alibi to src0 (KQ_scaled) const int n_heads_log2_floor = 1 << (int) floor(log2(n_head)); @@ -9179,7 +9185,7 @@ static void ggml_compute_forward_alibi_f16( //const int nb3 = src0->nb[3]; assert(nb0 == sizeof(ggml_fp16_t)); - assert(ne1+n_past == ne0); + assert(ne1 + n_past == ne0); (void) n_past; // add alibi to src0 (KQ_scaled) const int n_heads_log2_floor = 1 << (int) floor(log2(n_head)); @@ -11571,12 +11577,12 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) { node->n_tasks = 1; // TODO: this actually is doing nothing // the threads are still spinning -#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) +#if defined(GGML_USE_CUBLAS) + // with cuBLAS, we need memory for the full 3D / 4D data of src1 + cur = GGML_TYPE_SIZE[GGML_TYPE_F16]*ggml_nelements(node->src1); +#else // here we need memory just for single 2D matrix from src0 cur = GGML_TYPE_SIZE[GGML_TYPE_F32]*(node->src0->ne[0]*node->src0->ne[1]); -#else - // with GPU, we need memory for the full 3D / 4D data - cur = GGML_TYPE_SIZE[GGML_TYPE_F32]*MAX(ggml_nelements(node->src1), ggml_nelements(node->src0)); #endif } else { cur = GGML_TYPE_SIZE[GGML_TYPE_F16]*ggml_nelements(node->src1); @@ -11586,7 +11592,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) #endif } else if (node->src0->type == GGML_TYPE_F32 && node->src1->type == GGML_TYPE_F32) { cur = 0; -#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) +#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) { node->n_tasks = 1; } diff --git a/ggml.h b/ggml.h index 38ae9a6..c1c5495 100644 --- a/ggml.h +++ b/ggml.h @@ -701,8 +701,8 @@ extern "C" { struct ggml_tensor * c1); // Mapping operations - GGML_API typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *); - GGML_API typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *); + typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *); + typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *); GGML_API struct ggml_tensor * ggml_map_unary_f32( struct ggml_context * ctx,