server : automatically convert audio on the server (#1539)
* server : automatically convert audio on the server * server : remove rebundant comments * server : automatic conversion refactor * server : update server readme * server : remove unnecessary comments and tabs * server : put back remove calling * server : apply suggestions from code review Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * server : check ffmpeg before the server lunch * server : fix indentation * Apply suggestions from code review Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * server : fix function typo calling * server : fix function typo calling * server : add warning in readme --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
447d49530c
commit
23c21e92eb
@ -43,8 +43,12 @@ options:
|
|||||||
-oved D, --ov-e-device DNAME [CPU ] the OpenVINO device used for encode inference
|
-oved D, --ov-e-device DNAME [CPU ] the OpenVINO device used for encode inference
|
||||||
--host HOST, [127.0.0.1] Hostname/ip-adress for the server
|
--host HOST, [127.0.0.1] Hostname/ip-adress for the server
|
||||||
--port PORT, [8080 ] Port number for the server
|
--port PORT, [8080 ] Port number for the server
|
||||||
|
--convert, [false ] Convert audio to WAV, requires ffmpeg on the server
|
||||||
```
|
```
|
||||||
|
|
||||||
|
> [!WARNING]
|
||||||
|
> **Do not run the server example with administrative privileges and ensure it's operated in a sandbox environment, especially since it involves risky operations like accepting user file uploads and using ffmpeg for format conversions. Always validate and sanitize inputs to guard against potential security threats.**
|
||||||
|
|
||||||
## request examples
|
## request examples
|
||||||
|
|
||||||
**/inference**
|
**/inference**
|
||||||
|
@ -43,6 +43,8 @@ struct server_params
|
|||||||
int32_t port = 8080;
|
int32_t port = 8080;
|
||||||
int32_t read_timeout = 600;
|
int32_t read_timeout = 600;
|
||||||
int32_t write_timeout = 600;
|
int32_t write_timeout = 600;
|
||||||
|
|
||||||
|
bool ffmpeg_converter = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct whisper_params {
|
struct whisper_params {
|
||||||
@ -157,6 +159,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
|||||||
fprintf(stderr, " --host HOST, [%-7s] Hostname/ip-adress for the server\n", sparams.hostname.c_str());
|
fprintf(stderr, " --host HOST, [%-7s] Hostname/ip-adress for the server\n", sparams.hostname.c_str());
|
||||||
fprintf(stderr, " --port PORT, [%-7d] Port number for the server\n", sparams.port);
|
fprintf(stderr, " --port PORT, [%-7d] Port number for the server\n", sparams.port);
|
||||||
fprintf(stderr, " --public PATH, [%-7s] Path to the public folder\n", sparams.public_path.c_str());
|
fprintf(stderr, " --public PATH, [%-7s] Path to the public folder\n", sparams.public_path.c_str());
|
||||||
|
fprintf(stderr, " --convert, [%-7s] Convert audio to WAV, requires ffmpeg on the server", sparams.ffmpeg_converter ? "true" : "false");
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -203,6 +206,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params, serve
|
|||||||
else if ( arg == "--port") { sparams.port = std::stoi(argv[++i]); }
|
else if ( arg == "--port") { sparams.port = std::stoi(argv[++i]); }
|
||||||
else if ( arg == "--host") { sparams.hostname = argv[++i]; }
|
else if ( arg == "--host") { sparams.hostname = argv[++i]; }
|
||||||
else if ( arg == "--public") { sparams.public_path = argv[++i]; }
|
else if ( arg == "--public") { sparams.public_path = argv[++i]; }
|
||||||
|
else if ( arg == "--convert") { sparams.ffmpeg_converter = true; }
|
||||||
else {
|
else {
|
||||||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
||||||
whisper_print_usage(argc, argv, params, sparams);
|
whisper_print_usage(argc, argv, params, sparams);
|
||||||
@ -220,6 +224,45 @@ struct whisper_print_user_data {
|
|||||||
int progress_prev;
|
int progress_prev;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void check_ffmpeg_availibility() {
|
||||||
|
int result = system("ffmpeg -version");
|
||||||
|
|
||||||
|
if (result == 0) {
|
||||||
|
std::cout << "ffmpeg is available." << std::endl;
|
||||||
|
} else {
|
||||||
|
// ffmpeg is not available
|
||||||
|
std::cout << "ffmpeg is not found. Please ensure that ffmpeg is installed ";
|
||||||
|
std::cout << "and that its executable is included in your system's PATH. ";
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool convert_to_wav(const std::string & temp_filename, std::string & error_resp) {
|
||||||
|
std::ostringstream cmd_stream;
|
||||||
|
std::string converted_filename_temp = temp_filename + "_temp.wav";
|
||||||
|
cmd_stream << "ffmpeg -i \"" << temp_filename << "\" -ar 16000 -ac 1 -c:a pcm_s16le \"" << converted_filename_temp << "\" 2>&1";
|
||||||
|
std::string cmd = cmd_stream.str();
|
||||||
|
|
||||||
|
int status = std::system(cmd.c_str());
|
||||||
|
if (status != 0) {
|
||||||
|
error_resp = "{\"error\":\"FFmpeg conversion failed.\"}";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove the original file
|
||||||
|
if (remove(temp_filename.c_str()) != 0) {
|
||||||
|
error_resp = "{\"error\":\"Failed to remove the original file.\"}";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rename the temporary file to match the original filename
|
||||||
|
if (rename(converted_filename_temp.c_str(), temp_filename.c_str()) != 0) {
|
||||||
|
error_resp = "{\"error\":\"Failed to rename the temporary file.\"}";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
std::string estimate_diarization_speaker(std::vector<std::vector<float>> pcmf32s, int64_t t0, int64_t t1, bool id_only = false) {
|
std::string estimate_diarization_speaker(std::vector<std::vector<float>> pcmf32s, int64_t t0, int64_t t1, bool id_only = false) {
|
||||||
std::string speaker = "";
|
std::string speaker = "";
|
||||||
const int64_t n_samples = pcmf32s[0].size();
|
const int64_t n_samples = pcmf32s[0].size();
|
||||||
@ -407,6 +450,9 @@ int main(int argc, char ** argv) {
|
|||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (sparams.ffmpeg_converter) {
|
||||||
|
check_ffmpeg_availibility();
|
||||||
|
}
|
||||||
// whisper init
|
// whisper init
|
||||||
struct whisper_context_params cparams;
|
struct whisper_context_params cparams;
|
||||||
cparams.use_gpu = params.use_gpu;
|
cparams.use_gpu = params.use_gpu;
|
||||||
@ -462,6 +508,18 @@ int main(int argc, char ** argv) {
|
|||||||
temp_file << audio_file.content;
|
temp_file << audio_file.content;
|
||||||
temp_file.close();
|
temp_file.close();
|
||||||
|
|
||||||
|
// if file is not wav, convert to wav
|
||||||
|
|
||||||
|
if (sparams.ffmpeg_converter) {
|
||||||
|
std::string error_resp = "{\"error\":\"Failed to execute ffmpeg command.\"}";
|
||||||
|
const bool is_converted = convert_to_wav(temp_filename, error_resp);
|
||||||
|
if (!is_converted) {
|
||||||
|
res.set_content(error_resp, "application/json");
|
||||||
|
whisper_mutex.unlock();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// read wav content into pcmf32
|
// read wav content into pcmf32
|
||||||
if (!::read_wav(temp_filename, pcmf32, pcmf32s, params.diarize)) {
|
if (!::read_wav(temp_filename, pcmf32, pcmf32s, params.diarize)) {
|
||||||
fprintf(stderr, "error: failed to read WAV file '%s'\n", temp_filename.c_str());
|
fprintf(stderr, "error: failed to read WAV file '%s'\n", temp_filename.c_str());
|
||||||
@ -509,7 +567,6 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
// run the inference
|
// run the inference
|
||||||
{
|
{
|
||||||
|
|
||||||
printf("Running whisper.cpp inference on %s\n", filename.c_str());
|
printf("Running whisper.cpp inference on %s\n", filename.c_str());
|
||||||
whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
|
whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user