From 6ebba525f1cc9393752906023a3385a2cc8062ed Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 14 Jan 2024 18:08:20 +0200 Subject: [PATCH] talk-llama : sync llama.cpp --- examples/talk-llama/llama.cpp | 36 +++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/examples/talk-llama/llama.cpp b/examples/talk-llama/llama.cpp index 5182196..7af3871 100644 --- a/examples/talk-llama/llama.cpp +++ b/examples/talk-llama/llama.cpp @@ -2190,6 +2190,11 @@ struct llama_model_loader { LLM_KV llm_kv = LLM_KV(LLM_ARCH_UNKNOWN); llama_model_loader(const std::string & fname, bool use_mmap, const struct llama_model_kv_override * param_overrides_p) : file(fname.c_str(), "rb") { + int trace = 0; + if (getenv("LLAMA_TRACE")) { + trace = atoi(getenv("LLAMA_TRACE")); + } + struct gguf_init_params params = { /*.no_alloc = */ true, /*.ctx = */ &ctx_meta, @@ -2242,11 +2247,10 @@ struct llama_model_loader { type_max = type; } - // TODO: make runtime configurable -#if 0 - struct ggml_tensor * meta = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i)); - LLAMA_LOG_INFO("%s: - tensor %4d: %32s %-8s [ %s ]\n", __func__, i, ggml_get_name(meta), ggml_type_name(type), llama_format_tensor_shape(meta).c_str()); -#endif + if (trace > 0) { + struct ggml_tensor * meta = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i)); + LLAMA_LOG_INFO("%s: - tensor %4d: %32s %-8s [ %s ]\n", __func__, i, ggml_get_name(meta), ggml_type_name(type), llama_format_tensor_shape(meta).c_str()); + } } switch (type_max) { @@ -6451,15 +6455,15 @@ static uint8_t llama_token_to_byte(const llama_vocab& vocab, llama_token id) { static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch) { static const char * hex = "0123456789ABCDEF"; switch (llama_vocab_get_type(vocab)) { - case LLAMA_VOCAB_TYPE_SPM: { - const char buf[7] = { '<', '0', 'x', hex[ch >> 4], hex[ch & 15], '>', 0 }; - return vocab.token_to_id.at(buf); - } - case LLAMA_VOCAB_TYPE_BPE: { - return vocab.token_to_id.at(bytes_to_unicode_bpe(ch)); - } - default: - GGML_ASSERT(false); + case LLAMA_VOCAB_TYPE_SPM: { + const char buf[7] = { '<', '0', 'x', hex[ch >> 4], hex[ch & 15], '>', 0 }; + return vocab.token_to_id.at(buf); + } + case LLAMA_VOCAB_TYPE_BPE: { + return vocab.token_to_id.at(bytes_to_unicode_bpe(ch)); + } + default: + GGML_ASSERT(false); } } @@ -7095,7 +7099,7 @@ static std::vector llama_tokenize_internal(const llama_vocab & } #ifdef PRETOKENIZERDEBUG - LLAMA_LOG_WARN(TT: (%ld %ld %ld) '%s'\n", raw_text.length(), fragment.offset, fragment.length, raw_text.c_str()); + LLAMA_LOG_WARN("TT: (%ld %ld %ld) '%s'\n", raw_text.length(), fragment.offset, fragment.length, raw_text.c_str()); #endif llm_tokenizer_spm tokenizer(vocab); llama_escape_whitespace(raw_text); @@ -7116,7 +7120,7 @@ static std::vector llama_tokenize_internal(const llama_vocab & auto raw_text = fragment.raw_text.substr(fragment.offset, fragment.length); #ifdef PRETOKENIZERDEBUG - LLAMA_LOG_WARN(TT: (%ld %ld %ld) '%s'\n", raw_text.length(), fragment.offset, fragment.length, raw_text.c_str()); + LLAMA_LOG_WARN("TT: (%ld %ld %ld) '%s'\n", raw_text.length(), fragment.offset, fragment.length, raw_text.c_str()); #endif llm_tokenizer_bpe tokenizer(vocab); tokenizer.tokenize(raw_text, output);