whisper : add loader class to allow loading from buffer and others (#353)
* whisper : add loader to allow loading from other than file * whisper : rename whisper_init to whisper_init_from_file * whisper : add whisper_init_from_buffer * android : Delete local.properties * android : load models directly from assets * whisper : adding <stddef.h> needed for size_t + code style Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
52a3e0c92a
commit
1512545149
@ -91,7 +91,7 @@ var (
|
|||||||
func Whisper_init(path string) *Context {
|
func Whisper_init(path string) *Context {
|
||||||
cPath := C.CString(path)
|
cPath := C.CString(path)
|
||||||
defer C.free(unsafe.Pointer(cPath))
|
defer C.free(unsafe.Pointer(cPath))
|
||||||
if ctx := C.whisper_init(cPath); ctx != nil {
|
if ctx := C.whisper_init_from_file(cPath); ctx != nil {
|
||||||
return (*Context)(ctx)
|
return (*Context)(ctx)
|
||||||
} else {
|
} else {
|
||||||
return nil
|
return nil
|
||||||
|
@ -20,7 +20,7 @@ struct whisper_context * g_context;
|
|||||||
EMSCRIPTEN_BINDINGS(whisper) {
|
EMSCRIPTEN_BINDINGS(whisper) {
|
||||||
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
||||||
if (g_context == nullptr) {
|
if (g_context == nullptr) {
|
||||||
g_context = whisper_init(path_model.c_str());
|
g_context = whisper_init_from_file(path_model.c_str());
|
||||||
if (g_context != nullptr) {
|
if (g_context != nullptr) {
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
|
@ -52,7 +52,7 @@ EMSCRIPTEN_BINDINGS(bench) {
|
|||||||
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
||||||
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
||||||
if (g_contexts[i] == nullptr) {
|
if (g_contexts[i] == nullptr) {
|
||||||
g_contexts[i] = whisper_init(path_model.c_str());
|
g_contexts[i] = whisper_init_from_file(path_model.c_str());
|
||||||
if (g_contexts[i] != nullptr) {
|
if (g_contexts[i] != nullptr) {
|
||||||
if (g_worker.joinable()) {
|
if (g_worker.joinable()) {
|
||||||
g_worker.join();
|
g_worker.join();
|
||||||
|
@ -53,7 +53,7 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
// whisper init
|
// whisper init
|
||||||
|
|
||||||
struct whisper_context * ctx = whisper_init(params.model.c_str());
|
struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
|
||||||
|
|
||||||
{
|
{
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
|
@ -324,7 +324,7 @@ EMSCRIPTEN_BINDINGS(command) {
|
|||||||
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
||||||
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
||||||
if (g_contexts[i] == nullptr) {
|
if (g_contexts[i] == nullptr) {
|
||||||
g_contexts[i] = whisper_init(path_model.c_str());
|
g_contexts[i] = whisper_init_from_file(path_model.c_str());
|
||||||
if (g_contexts[i] != nullptr) {
|
if (g_contexts[i] != nullptr) {
|
||||||
g_running = true;
|
g_running = true;
|
||||||
if (g_worker.joinable()) {
|
if (g_worker.joinable()) {
|
||||||
|
@ -931,7 +931,7 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
// whisper init
|
// whisper init
|
||||||
|
|
||||||
struct whisper_context * ctx = whisper_init(params.model.c_str());
|
struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
|
||||||
|
|
||||||
// print some info about the processing
|
// print some info about the processing
|
||||||
{
|
{
|
||||||
|
@ -478,7 +478,7 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
// whisper init
|
// whisper init
|
||||||
|
|
||||||
struct whisper_context * ctx = whisper_init(params.model.c_str());
|
struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
|
||||||
|
|
||||||
if (ctx == nullptr) {
|
if (ctx == nullptr) {
|
||||||
fprintf(stderr, "error: failed to initialize whisper context\n");
|
fprintf(stderr, "error: failed to initialize whisper context\n");
|
||||||
|
@ -129,7 +129,7 @@ EMSCRIPTEN_BINDINGS(stream) {
|
|||||||
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
||||||
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
||||||
if (g_contexts[i] == nullptr) {
|
if (g_contexts[i] == nullptr) {
|
||||||
g_contexts[i] = whisper_init(path_model.c_str());
|
g_contexts[i] = whisper_init_from_file(path_model.c_str());
|
||||||
if (g_contexts[i] != nullptr) {
|
if (g_contexts[i] != nullptr) {
|
||||||
g_running = true;
|
g_running = true;
|
||||||
if (g_worker.joinable()) {
|
if (g_worker.joinable()) {
|
||||||
|
@ -456,7 +456,7 @@ int main(int argc, char ** argv) {
|
|||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct whisper_context * ctx = whisper_init(params.model.c_str());
|
struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
|
||||||
|
|
||||||
std::vector<float> pcmf32 (n_samples_30s, 0.0f);
|
std::vector<float> pcmf32 (n_samples_30s, 0.0f);
|
||||||
std::vector<float> pcmf32_old(n_samples_30s, 0.0f);
|
std::vector<float> pcmf32_old(n_samples_30s, 0.0f);
|
||||||
|
@ -271,7 +271,7 @@ EMSCRIPTEN_BINDINGS(talk) {
|
|||||||
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
||||||
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
||||||
if (g_contexts[i] == nullptr) {
|
if (g_contexts[i] == nullptr) {
|
||||||
g_contexts[i] = whisper_init(path_model.c_str());
|
g_contexts[i] = whisper_init_from_file(path_model.c_str());
|
||||||
if (g_contexts[i] != nullptr) {
|
if (g_contexts[i] != nullptr) {
|
||||||
g_running = true;
|
g_running = true;
|
||||||
if (g_worker.joinable()) {
|
if (g_worker.joinable()) {
|
||||||
|
@ -498,7 +498,7 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
// whisper init
|
// whisper init
|
||||||
|
|
||||||
struct whisper_context * ctx_wsp = whisper_init(params.model_wsp.c_str());
|
struct whisper_context * ctx_wsp = whisper_init_from_file(params.model_wsp.c_str());
|
||||||
|
|
||||||
// gpt init
|
// gpt init
|
||||||
|
|
||||||
|
@ -64,16 +64,22 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
|
|||||||
private suspend fun copyAssets() = withContext(Dispatchers.IO) {
|
private suspend fun copyAssets() = withContext(Dispatchers.IO) {
|
||||||
modelsPath.mkdirs()
|
modelsPath.mkdirs()
|
||||||
samplesPath.mkdirs()
|
samplesPath.mkdirs()
|
||||||
application.copyData("models", modelsPath, ::printMessage)
|
//application.copyData("models", modelsPath, ::printMessage)
|
||||||
application.copyData("samples", samplesPath, ::printMessage)
|
application.copyData("samples", samplesPath, ::printMessage)
|
||||||
printMessage("All data copied to working directory.\n")
|
printMessage("All data copied to working directory.\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
private suspend fun loadBaseModel() = withContext(Dispatchers.IO) {
|
private suspend fun loadBaseModel() = withContext(Dispatchers.IO) {
|
||||||
printMessage("Loading model...\n")
|
printMessage("Loading model...\n")
|
||||||
val firstModel = modelsPath.listFiles()!!.first()
|
val models = application.assets.list("models/")
|
||||||
whisperContext = WhisperContext.createContext(firstModel.absolutePath)
|
if (models != null) {
|
||||||
printMessage("Loaded model ${firstModel.name}.\n")
|
val inputstream = application.assets.open("models/" + models[0])
|
||||||
|
whisperContext = WhisperContext.createContextFromInputStream(inputstream)
|
||||||
|
printMessage("Loaded model ${models[0]}.\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
//val firstModel = modelsPath.listFiles()!!.first()
|
||||||
|
//whisperContext = WhisperContext.createContextFromFile(firstModel.absolutePath)
|
||||||
}
|
}
|
||||||
|
|
||||||
fun transcribeSample() = viewModelScope.launch {
|
fun transcribeSample() = viewModelScope.launch {
|
||||||
|
@ -4,6 +4,7 @@ import android.os.Build
|
|||||||
import android.util.Log
|
import android.util.Log
|
||||||
import kotlinx.coroutines.*
|
import kotlinx.coroutines.*
|
||||||
import java.io.File
|
import java.io.File
|
||||||
|
import java.io.InputStream
|
||||||
import java.util.concurrent.Executors
|
import java.util.concurrent.Executors
|
||||||
|
|
||||||
private const val LOG_TAG = "LibWhisper"
|
private const val LOG_TAG = "LibWhisper"
|
||||||
@ -39,13 +40,22 @@ class WhisperContext private constructor(private var ptr: Long) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
companion object {
|
companion object {
|
||||||
fun createContext(filePath: String): WhisperContext {
|
fun createContextFromFile(filePath: String): WhisperContext {
|
||||||
val ptr = WhisperLib.initContext(filePath)
|
val ptr = WhisperLib.initContext(filePath)
|
||||||
if (ptr == 0L) {
|
if (ptr == 0L) {
|
||||||
throw java.lang.RuntimeException("Couldn't create context with path $filePath")
|
throw java.lang.RuntimeException("Couldn't create context with path $filePath")
|
||||||
}
|
}
|
||||||
return WhisperContext(ptr)
|
return WhisperContext(ptr)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun createContextFromInputStream(stream: InputStream): WhisperContext {
|
||||||
|
val ptr = WhisperLib.initContextFromInputStream(stream)
|
||||||
|
|
||||||
|
if (ptr == 0L) {
|
||||||
|
throw java.lang.RuntimeException("Couldn't create context from input stream")
|
||||||
|
}
|
||||||
|
return WhisperContext(ptr)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -76,6 +86,7 @@ private class WhisperLib {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// JNI methods
|
// JNI methods
|
||||||
|
external fun initContextFromInputStream(inputStream: InputStream): Long
|
||||||
external fun initContext(modelPath: String): Long
|
external fun initContext(modelPath: String): Long
|
||||||
external fun freeContext(contextPtr: Long)
|
external fun freeContext(contextPtr: Long)
|
||||||
external fun fullTranscribe(contextPtr: Long, audioData: FloatArray)
|
external fun fullTranscribe(contextPtr: Long, audioData: FloatArray)
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
#include <android/log.h>
|
#include <android/log.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <sys/sysinfo.h>
|
#include <sys/sysinfo.h>
|
||||||
|
#include <string.h>
|
||||||
#include "whisper.h"
|
#include "whisper.h"
|
||||||
|
|
||||||
#define UNUSED(x) (void)(x)
|
#define UNUSED(x) (void)(x)
|
||||||
@ -17,13 +18,86 @@ static inline int max(int a, int b) {
|
|||||||
return (a > b) ? a : b;
|
return (a > b) ? a : b;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct input_stream_context {
|
||||||
|
size_t offset;
|
||||||
|
JNIEnv * env;
|
||||||
|
jobject thiz;
|
||||||
|
jobject input_stream;
|
||||||
|
|
||||||
|
jmethodID mid_available;
|
||||||
|
jmethodID mid_read;
|
||||||
|
};
|
||||||
|
|
||||||
|
size_t inputStreamRead(void * ctx, void * output, size_t read_size) {
|
||||||
|
struct input_stream_context* is = (struct input_stream_context*)ctx;
|
||||||
|
|
||||||
|
jint avail_size = (*is->env)->CallIntMethod(is->env, is->input_stream, is->mid_available);
|
||||||
|
jint size_to_copy = read_size < avail_size ? (jint)read_size : avail_size;
|
||||||
|
|
||||||
|
jbyteArray byte_array = (*is->env)->NewByteArray(is->env, size_to_copy);
|
||||||
|
|
||||||
|
jint n_read = (*is->env)->CallIntMethod(is->env, is->input_stream, is->mid_read, byte_array, 0, size_to_copy);
|
||||||
|
|
||||||
|
if (size_to_copy != read_size || size_to_copy != n_read) {
|
||||||
|
LOGI("Insufficient Read: Req=%zu, ToCopy=%d, Available=%d", read_size, size_to_copy, n_read);
|
||||||
|
}
|
||||||
|
|
||||||
|
jbyte* byte_array_elements = (*is->env)->GetByteArrayElements(is->env, byte_array, NULL);
|
||||||
|
memcpy(output, byte_array_elements, size_to_copy);
|
||||||
|
(*is->env)->ReleaseByteArrayElements(is->env, byte_array, byte_array_elements, JNI_ABORT);
|
||||||
|
|
||||||
|
(*is->env)->DeleteLocalRef(is->env, byte_array);
|
||||||
|
|
||||||
|
is->offset += size_to_copy;
|
||||||
|
|
||||||
|
return size_to_copy;
|
||||||
|
}
|
||||||
|
bool inputStreamEof(void * ctx) {
|
||||||
|
struct input_stream_context* is = (struct input_stream_context*)ctx;
|
||||||
|
|
||||||
|
jint result = (*is->env)->CallIntMethod(is->env, is->input_stream, is->mid_available);
|
||||||
|
return result <= 0;
|
||||||
|
}
|
||||||
|
void inputStreamClose(void * ctx) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
JNIEXPORT jlong JNICALL
|
||||||
|
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_initContextFromInputStream(
|
||||||
|
JNIEnv *env, jobject thiz, jobject input_stream) {
|
||||||
|
UNUSED(thiz);
|
||||||
|
|
||||||
|
struct whisper_context *context = NULL;
|
||||||
|
struct whisper_model_loader loader = {};
|
||||||
|
struct input_stream_context inp_ctx = {};
|
||||||
|
|
||||||
|
inp_ctx.offset = 0;
|
||||||
|
inp_ctx.env = env;
|
||||||
|
inp_ctx.thiz = thiz;
|
||||||
|
inp_ctx.input_stream = input_stream;
|
||||||
|
|
||||||
|
jclass cls = (*env)->GetObjectClass(env, input_stream);
|
||||||
|
inp_ctx.mid_available = (*env)->GetMethodID(env, cls, "available", "()I");
|
||||||
|
inp_ctx.mid_read = (*env)->GetMethodID(env, cls, "read", "([BII)I");
|
||||||
|
|
||||||
|
loader.context = &inp_ctx;
|
||||||
|
loader.read = inputStreamRead;
|
||||||
|
loader.eof = inputStreamEof;
|
||||||
|
loader.close = inputStreamClose;
|
||||||
|
|
||||||
|
loader.eof(loader.context);
|
||||||
|
|
||||||
|
context = whisper_init(&loader);
|
||||||
|
return (jlong) context;
|
||||||
|
}
|
||||||
|
|
||||||
JNIEXPORT jlong JNICALL
|
JNIEXPORT jlong JNICALL
|
||||||
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_initContext(
|
Java_com_whispercppdemo_whisper_WhisperLib_00024Companion_initContext(
|
||||||
JNIEnv *env, jobject thiz, jstring model_path_str) {
|
JNIEnv *env, jobject thiz, jstring model_path_str) {
|
||||||
UNUSED(thiz);
|
UNUSED(thiz);
|
||||||
struct whisper_context *context = NULL;
|
struct whisper_context *context = NULL;
|
||||||
const char *model_path_chars = (*env)->GetStringUTFChars(env, model_path_str, NULL);
|
const char *model_path_chars = (*env)->GetStringUTFChars(env, model_path_str, NULL);
|
||||||
context = whisper_init(model_path_chars);
|
context = whisper_init_from_file(model_path_chars);
|
||||||
(*env)->ReleaseStringUTFChars(env, model_path_str, model_path_chars);
|
(*env)->ReleaseStringUTFChars(env, model_path_str, model_path_chars);
|
||||||
return (jlong) context;
|
return (jlong) context;
|
||||||
}
|
}
|
||||||
|
@ -1,10 +0,0 @@
|
|||||||
## This file is automatically generated by Android Studio.
|
|
||||||
# Do not modify this file -- YOUR CHANGES WILL BE ERASED!
|
|
||||||
#
|
|
||||||
# This file should *NOT* be checked into Version Control Systems,
|
|
||||||
# as it contains information specific to your local configuration.
|
|
||||||
#
|
|
||||||
# Location of the SDK. This is only used by Gradle.
|
|
||||||
# For customization when using a Version Control System, please read the
|
|
||||||
# header note.
|
|
||||||
sdk.dir=/Users/kevin/Library/Android/sdk
|
|
@ -61,7 +61,7 @@ void AudioInputCallback(void * inUserData,
|
|||||||
NSLog(@"Loading model from %@", modelPath);
|
NSLog(@"Loading model from %@", modelPath);
|
||||||
|
|
||||||
// create ggml context
|
// create ggml context
|
||||||
stateInp.ctx = whisper_init([modelPath UTF8String]);
|
stateInp.ctx = whisper_init_from_file([modelPath UTF8String]);
|
||||||
|
|
||||||
// check if the model was loaded successfully
|
// check if the model was loaded successfully
|
||||||
if (stateInp.ctx == NULL) {
|
if (stateInp.ctx == NULL) {
|
||||||
|
@ -55,7 +55,7 @@ actor WhisperContext {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static func createContext(path: String) throws -> WhisperContext {
|
static func createContext(path: String) throws -> WhisperContext {
|
||||||
let context = whisper_init(path)
|
let context = whisper_init_from_file(path)
|
||||||
if let context {
|
if let context {
|
||||||
return WhisperContext(context: context)
|
return WhisperContext(context: context)
|
||||||
} else {
|
} else {
|
||||||
|
@ -18,7 +18,7 @@ EMSCRIPTEN_BINDINGS(whisper) {
|
|||||||
|
|
||||||
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
||||||
if (g_contexts[i] == nullptr) {
|
if (g_contexts[i] == nullptr) {
|
||||||
g_contexts[i] = whisper_init(path_model.c_str());
|
g_contexts[i] = whisper_init_from_file(path_model.c_str());
|
||||||
if (g_contexts[i] != nullptr) {
|
if (g_contexts[i] != nullptr) {
|
||||||
return i + 1;
|
return i + 1;
|
||||||
} else {
|
} else {
|
||||||
|
144
whisper.cpp
144
whisper.cpp
@ -437,8 +437,8 @@ struct whisper_context {
|
|||||||
};
|
};
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
static void read_safe(std::ifstream& fin, T& dest) {
|
static void read_safe(whisper_model_loader * loader, T & dest) {
|
||||||
fin.read((char*)& dest, sizeof(T));
|
loader->read(loader->context, &dest, sizeof(T));
|
||||||
}
|
}
|
||||||
|
|
||||||
// load the model from a ggml file
|
// load the model from a ggml file
|
||||||
@ -452,24 +452,18 @@ static void read_safe(std::ifstream& fin, T& dest) {
|
|||||||
//
|
//
|
||||||
// see the convert-pt-to-ggml.py script for details
|
// see the convert-pt-to-ggml.py script for details
|
||||||
//
|
//
|
||||||
static bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
|
static bool whisper_model_load(struct whisper_model_loader * loader, whisper_context & wctx) {
|
||||||
fprintf(stderr, "%s: loading model from '%s'\n", __func__, fname.c_str());
|
fprintf(stderr, "%s: loading model\n", __func__);
|
||||||
|
|
||||||
auto & model = wctx.model;
|
auto & model = wctx.model;
|
||||||
auto & vocab = wctx.vocab;
|
auto & vocab = wctx.vocab;
|
||||||
|
|
||||||
auto fin = std::ifstream(fname, std::ios::binary);
|
|
||||||
if (!fin) {
|
|
||||||
fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str());
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// verify magic
|
// verify magic
|
||||||
{
|
{
|
||||||
uint32_t magic;
|
uint32_t magic;
|
||||||
read_safe(fin, magic);
|
read_safe(loader, magic);
|
||||||
if (magic != 0x67676d6c) {
|
if (magic != 0x67676d6c) {
|
||||||
fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str());
|
fprintf(stderr, "%s: invalid model data (bad magic)\n", __func__);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -478,17 +472,17 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx
|
|||||||
{
|
{
|
||||||
auto & hparams = model.hparams;
|
auto & hparams = model.hparams;
|
||||||
|
|
||||||
read_safe(fin, hparams.n_vocab);
|
read_safe(loader, hparams.n_vocab);
|
||||||
read_safe(fin, hparams.n_audio_ctx);
|
read_safe(loader, hparams.n_audio_ctx);
|
||||||
read_safe(fin, hparams.n_audio_state);
|
read_safe(loader, hparams.n_audio_state);
|
||||||
read_safe(fin, hparams.n_audio_head);
|
read_safe(loader, hparams.n_audio_head);
|
||||||
read_safe(fin, hparams.n_audio_layer);
|
read_safe(loader, hparams.n_audio_layer);
|
||||||
read_safe(fin, hparams.n_text_ctx);
|
read_safe(loader, hparams.n_text_ctx);
|
||||||
read_safe(fin, hparams.n_text_state);
|
read_safe(loader, hparams.n_text_state);
|
||||||
read_safe(fin, hparams.n_text_head);
|
read_safe(loader, hparams.n_text_head);
|
||||||
read_safe(fin, hparams.n_text_layer);
|
read_safe(loader, hparams.n_text_layer);
|
||||||
read_safe(fin, hparams.n_mels);
|
read_safe(loader, hparams.n_mels);
|
||||||
read_safe(fin, hparams.f16);
|
read_safe(loader, hparams.f16);
|
||||||
|
|
||||||
assert(hparams.n_text_state == hparams.n_audio_state);
|
assert(hparams.n_text_state == hparams.n_audio_state);
|
||||||
|
|
||||||
@ -536,17 +530,17 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx
|
|||||||
{
|
{
|
||||||
auto & filters = wctx.model.filters;
|
auto & filters = wctx.model.filters;
|
||||||
|
|
||||||
read_safe(fin, filters.n_mel);
|
read_safe(loader, filters.n_mel);
|
||||||
read_safe(fin, filters.n_fft);
|
read_safe(loader, filters.n_fft);
|
||||||
|
|
||||||
filters.data.resize(filters.n_mel * filters.n_fft);
|
filters.data.resize(filters.n_mel * filters.n_fft);
|
||||||
fin.read((char *) filters.data.data(), filters.data.size() * sizeof(float));
|
loader->read(loader->context, filters.data.data(), filters.data.size() * sizeof(float));
|
||||||
}
|
}
|
||||||
|
|
||||||
// load vocab
|
// load vocab
|
||||||
{
|
{
|
||||||
int32_t n_vocab = 0;
|
int32_t n_vocab = 0;
|
||||||
read_safe(fin, n_vocab);
|
read_safe(loader, n_vocab);
|
||||||
|
|
||||||
//if (n_vocab != model.hparams.n_vocab) {
|
//if (n_vocab != model.hparams.n_vocab) {
|
||||||
// fprintf(stderr, "%s: invalid model file '%s' (bad vocab size %d != %d)\n",
|
// fprintf(stderr, "%s: invalid model file '%s' (bad vocab size %d != %d)\n",
|
||||||
@ -561,11 +555,11 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx
|
|||||||
|
|
||||||
for (int i = 0; i < n_vocab; i++) {
|
for (int i = 0; i < n_vocab; i++) {
|
||||||
uint32_t len;
|
uint32_t len;
|
||||||
read_safe(fin, len);
|
read_safe(loader, len);
|
||||||
|
|
||||||
if (len > 0) {
|
if (len > 0) {
|
||||||
tmp.resize(len);
|
tmp.resize(len);
|
||||||
fin.read(&tmp[0], tmp.size()); // read to buffer
|
loader->read(loader->context, &tmp[0], tmp.size()); // read to buffer
|
||||||
word.assign(&tmp[0], tmp.size());
|
word.assign(&tmp[0], tmp.size());
|
||||||
} else {
|
} else {
|
||||||
// seems like we have an empty-string token in multi-language models (i = 50256)
|
// seems like we have an empty-string token in multi-language models (i = 50256)
|
||||||
@ -1017,24 +1011,24 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx
|
|||||||
int32_t length;
|
int32_t length;
|
||||||
int32_t ftype;
|
int32_t ftype;
|
||||||
|
|
||||||
read_safe(fin, n_dims);
|
read_safe(loader, n_dims);
|
||||||
read_safe(fin, length);
|
read_safe(loader, length);
|
||||||
read_safe(fin, ftype);
|
read_safe(loader, ftype);
|
||||||
|
|
||||||
if (fin.eof()) {
|
if (loader->eof(loader->context)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t nelements = 1;
|
int32_t nelements = 1;
|
||||||
int32_t ne[3] = { 1, 1, 1 };
|
int32_t ne[3] = { 1, 1, 1 };
|
||||||
for (int i = 0; i < n_dims; ++i) {
|
for (int i = 0; i < n_dims; ++i) {
|
||||||
read_safe(fin, ne[i]);
|
read_safe(loader, ne[i]);
|
||||||
nelements *= ne[i];
|
nelements *= ne[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string name;
|
std::string name;
|
||||||
std::vector<char> tmp(length); // create a buffer
|
std::vector<char> tmp(length); // create a buffer
|
||||||
fin.read(&tmp[0], tmp.size()); // read to buffer
|
loader->read(loader->context, &tmp[0], tmp.size()); // read to buffer
|
||||||
name.assign(&tmp[0], tmp.size());
|
name.assign(&tmp[0], tmp.size());
|
||||||
|
|
||||||
if (model.tensors.find(name) == model.tensors.end()) {
|
if (model.tensors.find(name) == model.tensors.end()) {
|
||||||
@ -1062,7 +1056,7 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
fin.read(reinterpret_cast<char *>(tensor->data), ggml_nbytes(tensor));
|
loader->read(loader->context, tensor->data, ggml_nbytes(tensor));
|
||||||
|
|
||||||
//printf("%48s - [%5d, %5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ne[2], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0);
|
//printf("%48s - [%5d, %5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ne[2], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0);
|
||||||
total_size += ggml_nbytes(tensor);
|
total_size += ggml_nbytes(tensor);
|
||||||
@ -1079,8 +1073,6 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fin.close();
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2240,7 +2232,74 @@ static std::vector<whisper_vocab::id> tokenize(const whisper_vocab & vocab, cons
|
|||||||
// interface implementation
|
// interface implementation
|
||||||
//
|
//
|
||||||
|
|
||||||
struct whisper_context * whisper_init(const char * path_model) {
|
struct whisper_context * whisper_init_from_file(const char * path_model) {
|
||||||
|
whisper_model_loader loader = {};
|
||||||
|
|
||||||
|
fprintf(stderr, "%s: loading model from '%s'\n", __func__, path_model);
|
||||||
|
|
||||||
|
auto fin = std::ifstream(path_model, std::ios::binary);
|
||||||
|
if (!fin) {
|
||||||
|
fprintf(stderr, "%s: failed to open '%s'\n", __func__, path_model);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
loader.context = &fin;
|
||||||
|
loader.read = [](void * ctx, void * output, size_t read_size) {
|
||||||
|
std::ifstream * fin = (std::ifstream*)ctx;
|
||||||
|
fin->read((char *)output, read_size);
|
||||||
|
return read_size;
|
||||||
|
};
|
||||||
|
|
||||||
|
loader.eof = [](void * ctx) {
|
||||||
|
std::ifstream * fin = (std::ifstream*)ctx;
|
||||||
|
return fin->eof();
|
||||||
|
};
|
||||||
|
|
||||||
|
loader.close = [](void * ctx) {
|
||||||
|
std::ifstream * fin = (std::ifstream*)ctx;
|
||||||
|
fin->close();
|
||||||
|
};
|
||||||
|
|
||||||
|
return whisper_init(&loader);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct whisper_context * whisper_init_from_buffer(void * buffer, size_t buffer_size) {
|
||||||
|
struct buf_context {
|
||||||
|
uint8_t* buffer;
|
||||||
|
size_t size;
|
||||||
|
size_t current_offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
buf_context ctx = { reinterpret_cast<uint8_t*>(buffer), buffer_size, 0 };
|
||||||
|
whisper_model_loader loader = {};
|
||||||
|
|
||||||
|
fprintf(stderr, "%s: loading model from buffer\n", __func__);
|
||||||
|
|
||||||
|
loader.context = &ctx;
|
||||||
|
|
||||||
|
loader.read = [](void * ctx, void * output, size_t read_size) {
|
||||||
|
buf_context * buf = reinterpret_cast<buf_context *>(ctx);
|
||||||
|
|
||||||
|
size_t size_to_copy = buf->current_offset + read_size < buf->size ? read_size : buf->size - buf->current_offset;
|
||||||
|
|
||||||
|
memcpy(output, buf->buffer + buf->current_offset, size_to_copy);
|
||||||
|
buf->current_offset += size_to_copy;
|
||||||
|
|
||||||
|
return size_to_copy;
|
||||||
|
};
|
||||||
|
|
||||||
|
loader.eof = [](void * ctx) {
|
||||||
|
buf_context * buf = reinterpret_cast<buf_context *>(ctx);
|
||||||
|
|
||||||
|
return buf->current_offset >= buf->size;
|
||||||
|
};
|
||||||
|
|
||||||
|
loader.close = [](void * /*ctx*/) { };
|
||||||
|
|
||||||
|
return whisper_init(&loader);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct whisper_context * whisper_init(struct whisper_model_loader * loader) {
|
||||||
ggml_time_init();
|
ggml_time_init();
|
||||||
|
|
||||||
whisper_context * ctx = new whisper_context;
|
whisper_context * ctx = new whisper_context;
|
||||||
@ -2249,14 +2308,17 @@ struct whisper_context * whisper_init(const char * path_model) {
|
|||||||
|
|
||||||
ctx->t_start_us = t_start_us;
|
ctx->t_start_us = t_start_us;
|
||||||
|
|
||||||
if (!whisper_model_load(path_model, *ctx)) {
|
if (!whisper_model_load(loader, *ctx)) {
|
||||||
fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, path_model);
|
loader->close(loader->context);
|
||||||
|
fprintf(stderr, "%s: failed to load model\n", __func__);
|
||||||
delete ctx;
|
delete ctx;
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx->t_load_us = ggml_time_us() - t_start_us;
|
ctx->t_load_us = ggml_time_us() - t_start_us;
|
||||||
|
|
||||||
|
loader->close(loader->context);
|
||||||
|
|
||||||
return ctx;
|
return ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
20
whisper.h
20
whisper.h
@ -1,6 +1,7 @@
|
|||||||
#ifndef WHISPER_H
|
#ifndef WHISPER_H
|
||||||
#define WHISPER_H
|
#define WHISPER_H
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
|
|
||||||
@ -40,7 +41,7 @@ extern "C" {
|
|||||||
//
|
//
|
||||||
// ...
|
// ...
|
||||||
//
|
//
|
||||||
// struct whisper_context * ctx = whisper_init("/path/to/ggml-base.en.bin");
|
// struct whisper_context * ctx = whisper_init_from_file("/path/to/ggml-base.en.bin");
|
||||||
//
|
//
|
||||||
// if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
|
// if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
|
||||||
// fprintf(stderr, "failed to process audio\n");
|
// fprintf(stderr, "failed to process audio\n");
|
||||||
@ -84,9 +85,20 @@ extern "C" {
|
|||||||
float vlen; // voice length of the token
|
float vlen; // voice length of the token
|
||||||
} whisper_token_data;
|
} whisper_token_data;
|
||||||
|
|
||||||
// Allocates all memory needed for the model and loads the model from the given file.
|
typedef struct whisper_model_loader {
|
||||||
// Returns NULL on failure.
|
void * context;
|
||||||
WHISPER_API struct whisper_context * whisper_init(const char * path_model);
|
|
||||||
|
size_t (*read)(void * ctx, void * output, size_t read_size);
|
||||||
|
bool (*eof)(void * ctx);
|
||||||
|
void (*close)(void * ctx);
|
||||||
|
} whisper_model_loader;
|
||||||
|
|
||||||
|
// Various function to load a ggml whisper model.
|
||||||
|
// Allocates (almost) all memory needed for the model.
|
||||||
|
// Return NULL on failure
|
||||||
|
WHISPER_API struct whisper_context * whisper_init_from_file(const char * path_model);
|
||||||
|
WHISPER_API struct whisper_context * whisper_init_from_buffer(void * buffer, size_t buffer_size);
|
||||||
|
WHISPER_API struct whisper_context * whisper_init(struct whisper_model_loader * loader);
|
||||||
|
|
||||||
// Frees all memory allocated by the model.
|
// Frees all memory allocated by the model.
|
||||||
WHISPER_API void whisper_free(struct whisper_context * ctx);
|
WHISPER_API void whisper_free(struct whisper_context * ctx);
|
||||||
|
Loading…
Reference in New Issue
Block a user