talk, talk-llama : pass text_to_speak as a file (#1865)

* talk-llama: pass file instead of arg

it is too hard to quote text in a portable way

* talk-llama: pass heard_ok as a file

* talk-llama: let eleven-labs.py accept options

Options: -v voice, -s savefile, -p (--play)

* talk-llama: check installed commands in "speak"

Pass "-q" to eleven-labs.py to skip checking whether elevenlabs is installed

* talk-llama: pass voice_id again

in order to sync talk with talk-llama

* talk: sync with talk-llama

Passing text_to_speak as a file is safer and more portable
cf. https://stackoverflow.com/a/59036879/45375

* talk and talk-llama: get all installed voices in speak.ps1

* talk and talk-llama: get voices from api

* talk and talk-llama: add more options to eleven-labs.py

and remove DEFAULT_VOICE because it is deprecated (https://www.reddit.com/r/ElevenLabs/comments/1830abt/what_happened_to_bella/)

```
usage: eleven-labs.py [-q] [-l] [-h] [-n NAME | -v NUMBER] [-f KEY=VAL] [-s FILE | -p] [TEXTFILE]

options:
  -q, --quick           skip checking the required library

action:
  TEXTFILE              read the text file (default: stdin)
  -l, --list            show the list of voices and exit
  -h, --help            show this help and exit

voice selection:
  -n NAME, --name NAME  get a voice object by name (default: Arnold)
  -v NUMBER, --voice NUMBER
                        get a voice object by number (see --list)
  -f KEY=VAL, --filter KEY=VAL
                        filter voices by labels (default: "use case=narration")
                        this option can be used multiple times
                        filtering will be disabled if the first -f has no "=" (e.g. -f "any")

output:
  -s FILE, --save FILE  save the TTS to a file (default: audio.mp3)
  -p, --play            play the TTS with ffplay
```

* examples: add speak_with_file()

as suggested in the review

* talk and talk-llama: ignore to_speak.txt
This commit is contained in:
Tamotsu Takahashi 2024-02-24 16:24:47 +09:00 committed by GitHub
parent a0ddd8392c
commit f18738f247
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 256 additions and 89 deletions

View File

@ -863,3 +863,21 @@ bool is_file_exist(const char *fileName)
std::ifstream infile(fileName); std::ifstream infile(fileName);
return infile.good(); return infile.good();
} }
bool speak_with_file(const std::string & command, const std::string & text, const std::string & path, int voice_id)
{
std::ofstream speak_file(path.c_str());
if (speak_file.fail()) {
fprintf(stderr, "%s: failed to open speak_file\n", __func__);
return false;
} else {
speak_file.write(text.c_str(), text.size());
speak_file.close();
int ret = system((command + " " + std::to_string(voice_id) + " " + path).c_str());
if (ret != 0) {
fprintf(stderr, "%s: failed to speak\n", __func__);
return false;
}
}
return true;
}

View File

@ -306,3 +306,6 @@ int timestamp_to_sample(int64_t t, int n_samples, int whisper_sample_rate);
// check if file exists using ifstream // check if file exists using ifstream
bool is_file_exist(const char *fileName); bool is_file_exist(const char *fileName);
// write text to file, and call system("command voice_id file")
bool speak_with_file(const std::string & command, const std::string & text, const std::string & path, int voice_id);

View File

@ -1 +1,2 @@
audio.mp3 audio.mp3
to_speak.txt

View File

@ -1,20 +1,80 @@
import sys import sys
import importlib.util import argparse
import textwrap
if importlib.util.find_spec("elevenlabs") is None: parser = argparse.ArgumentParser(add_help=False,
formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument("-q", "--quick", action="store_true",
help="skip checking the required library")
modes = parser.add_argument_group("action")
modes.add_argument("inputfile", metavar="TEXTFILE",
nargs='?', type=argparse.FileType(), default=sys.stdin,
help="read the text file (default: stdin)")
modes.add_argument("-l", "--list", action="store_true",
help="show the list of voices and exit")
modes.add_argument("-h", "--help", action="help",
help="show this help and exit")
selopts = parser.add_argument_group("voice selection")
selmodes = selopts.add_mutually_exclusive_group()
selmodes.add_argument("-n", "--name",
default="Arnold",
help="get a voice object by name (default: Arnold)")
selmodes.add_argument("-v", "--voice", type=int, metavar="NUMBER",
help="get a voice object by number (see --list)")
selopts.add_argument("-f", "--filter", action="append", metavar="KEY=VAL",
default=["use case=narration"],
help=textwrap.dedent('''\
filter voices by labels (default: "use case=narration")
this option can be used multiple times
filtering will be disabled if the first -f has no "=" (e.g. -f "any")
'''))
outmodes = parser.add_argument_group("output")
outgroup = outmodes.add_mutually_exclusive_group()
outgroup.add_argument("-s", "--save", metavar="FILE",
default="audio.mp3",
help="save the TTS to a file (default: audio.mp3)")
outgroup.add_argument("-p", "--play", action="store_true",
help="play the TTS with ffplay")
args = parser.parse_args()
if not args.quick:
import importlib.util
if importlib.util.find_spec("elevenlabs") is None:
print("elevenlabs library is not installed, you can install it to your enviroment using 'pip install elevenlabs'") print("elevenlabs library is not installed, you can install it to your enviroment using 'pip install elevenlabs'")
sys.exit() sys.exit()
from elevenlabs import generate, play, save from elevenlabs import voices, generate, play, save
# Get a Voice object, by name or UUID if args.filter and "=" in args.filter[0]:
voice = "Arnold" #Possible Voices: Adam Antoni Arnold Bella Domi Elli Josh voicelist = voices()
for f in args.filter:
label, value = f.split("=")
voicelist = filter(lambda x: x.labels.get(label) == value, voicelist)
voicelist = list(voicelist)
else:
voicelist = list(voices())
if args.list:
for i, v in enumerate(voicelist):
print(str(i) + ": " + v.name + " " + str(v.labels))
sys.exit()
if args.voice:
voice = voicelist[args.voice % len(voicelist)]
else:
voice = args.name
# if -n should consult -f, use the following
#voice = next(x for x in voicelist if x.name == args.name)
# Generate the TTS
audio = generate( audio = generate(
text=str(sys.argv[2:]), text=str(args.inputfile.read()),
voice=voice voice=voice
) )
if args.play:
# Save the TTS to a file play(audio)
save(audio, "audio.mp3") else:
save(audio, args.save)

View File

@ -1,32 +1,40 @@
#!/bin/bash #!/bin/bash
# Usage: # Usage:
# speak.sh <voice_id> <text-to-speak> # speak <voice_id> <textfile>
# espeak function installed() { command -v $1 >/dev/null 2>&1; }
# Mac OS: brew install espeak
# Linux: apt-get install espeak
#
#espeak -v en-us+m$1 -s 225 -p 50 -a 200 -g 5 -k 5 "$2"
# piper if installed espeak; then
# espeak -v en-us+m$1 -s 225 -p 50 -a 200 -g 5 -k 5 -f $2
# https://github.com/rhasspy/piper
# elif installed piper && installed aplay; then
# Tested with Linux: cat $2 | piper --model ~/en_US-lessac-medium.onnx --output-raw | aplay -q -r 22050 -f S16_LE -t raw -
#
#echo "$2" | piper --model ~/en_US-lessac-medium.onnx --output-raw | aplay -q -r 22050 -f S16_LE -t raw -
# for Mac # for Mac
say "$2" elif installed say; then
say -f $2
# Eleven Labs # Eleven Labs
# To use it, install the elevenlabs module from pip (pip install elevenlabs) elif installed python3 && \
# It's possible to use the API for free with limited number of characters. To increase this limit register to https://beta.elevenlabs.io to get an api key and paste it after 'ELEVEN_API_KEY=' python3 -c 'import importlib.util; exit(not importlib.util.find_spec("elevenlabs"))' && \
#Keep the line commented to use the free version whitout api key installed ffplay; then
# # It's possible to use the API for free with limited number of characters.
#export ELEVEN_API_KEY=your_api_key # To increase this limit register to https://beta.elevenlabs.io to get an api key
#wd=$(dirname $0) # and paste it after 'ELEVEN_API_KEY='
#script=$wd/eleven-labs.py # Keep the line commented to use the free version without api key
#python3 $script $1 "$2" >/dev/null 2>&1 #export ELEVEN_API_KEY=your_api_key
#ffplay -autoexit -nodisp -loglevel quiet -hide_banner -i ./audio.mp3 >/dev/null 2>&1 wd=$(dirname $0)
script=$wd/eleven-labs.py
python3 $script -q -p -v $1 $2 >/dev/null 2>&1
# Uncomment to keep the audio file
#python3 $script -q -s ./audio.mp3 -v $1 $2 >/dev/null 2>&1
#ffplay -autoexit -nodisp -loglevel quiet -hide_banner -i ./audio.mp3 >/dev/null 2>&1
else
echo 'Install espeak ("brew install espeak" or "apt-get install espeak"),'
echo 'piper ("pip install piper-tts" or https://github.com/rhasspy/piper) with aplay,'
echo 'or elevenlabs ("pip install elevenlabs") with ffplay.'
echo '(export ELEVEN_API_KEY if you have an api key from https://beta.elevenlabs.io)'
fi

View File

@ -1 +1 @@
@powershell -ExecutionPolicy Bypass -F examples\talk\speak.ps1 %1 %2 @powershell -ExecutionPolicy Bypass -F examples\talk-llama\speak.ps1 %1 %2

View File

@ -1,12 +1,14 @@
# Set-ExecutionPolicy -ExecutionPolicy Bypass -Scope CurrentUser # Set-ExecutionPolicy -ExecutionPolicy Bypass -Scope CurrentUser
param( param(
# voice options are David or Zira [Parameter(Mandatory=$true)][int]$voicenum,
[Parameter(Mandatory=$true)][string]$voice, [Parameter(Mandatory=$true)][string]$textfile
[Parameter(Mandatory=$true)][string]$text
) )
Add-Type -AssemblyName System.Speech; Add-Type -AssemblyName System.Speech;
$speak = New-Object System.Speech.Synthesis.SpeechSynthesizer; $speak = New-Object System.Speech.Synthesis.SpeechSynthesizer;
$speak.SelectVoice("Microsoft $voice Desktop"); $voiceoptions = $speak.GetInstalledVoices("en-US");
$voice = $voiceoptions[$voicenum % $voiceoptions.count];
$speak.SelectVoice($voice.VoiceInfo.Name);
$speak.Rate="0"; $speak.Rate="0";
$text = Get-Content -Path $textfile;
$speak.Speak($text); $speak.Speak($text);

View File

@ -75,6 +75,7 @@ struct whisper_params {
std::string model_wsp = "models/ggml-base.en.bin"; std::string model_wsp = "models/ggml-base.en.bin";
std::string model_llama = "models/ggml-llama-7B.bin"; std::string model_llama = "models/ggml-llama-7B.bin";
std::string speak = "./examples/talk-llama/speak"; std::string speak = "./examples/talk-llama/speak";
std::string speak_file = "./examples/talk-llama/to_speak.txt";
std::string prompt = ""; std::string prompt = "";
std::string fname_out; std::string fname_out;
std::string path_session = ""; // path to file for saving/loading model eval state std::string path_session = ""; // path to file for saving/loading model eval state
@ -113,6 +114,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
else if (arg == "-mw" || arg == "--model-whisper") { params.model_wsp = argv[++i]; } else if (arg == "-mw" || arg == "--model-whisper") { params.model_wsp = argv[++i]; }
else if (arg == "-ml" || arg == "--model-llama") { params.model_llama = argv[++i]; } else if (arg == "-ml" || arg == "--model-llama") { params.model_llama = argv[++i]; }
else if (arg == "-s" || arg == "--speak") { params.speak = argv[++i]; } else if (arg == "-s" || arg == "--speak") { params.speak = argv[++i]; }
else if (arg == "-sf" || arg == "--speak-file") { params.speak_file = argv[++i]; }
else if (arg == "--prompt-file") { else if (arg == "--prompt-file") {
std::ifstream file(argv[++i]); std::ifstream file(argv[++i]);
std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.prompt)); std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.prompt));
@ -160,6 +162,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
fprintf(stderr, " -mw FILE, --model-whisper [%-7s] whisper model file\n", params.model_wsp.c_str()); fprintf(stderr, " -mw FILE, --model-whisper [%-7s] whisper model file\n", params.model_wsp.c_str());
fprintf(stderr, " -ml FILE, --model-llama [%-7s] llama model file\n", params.model_llama.c_str()); fprintf(stderr, " -ml FILE, --model-llama [%-7s] llama model file\n", params.model_llama.c_str());
fprintf(stderr, " -s FILE, --speak TEXT [%-7s] command for TTS\n", params.speak.c_str()); fprintf(stderr, " -s FILE, --speak TEXT [%-7s] command for TTS\n", params.speak.c_str());
fprintf(stderr, " -sf FILE, --speak-file [%-7s] file to pass to TTS\n", params.speak_file.c_str());
fprintf(stderr, " --prompt-file FNAME [%-7s] file with custom prompt to start dialog\n", ""); fprintf(stderr, " --prompt-file FNAME [%-7s] file with custom prompt to start dialog\n", "");
fprintf(stderr, " --session FNAME file to cache model state in (may be large!) (default: none)\n"); fprintf(stderr, " --session FNAME file to cache model state in (may be large!) (default: none)\n");
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str()); fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
@ -546,10 +549,7 @@ int main(int argc, char ** argv) {
// optionally give audio feedback that the current text is being processed // optionally give audio feedback that the current text is being processed
if (!params.heard_ok.empty()) { if (!params.heard_ok.empty()) {
int ret = system((params.speak + " " + std::to_string(voice_id) + " '" + params.heard_ok + "'").c_str()); speak_with_file(params.speak, params.heard_ok, params.speak_file, voice_id);
if (ret != 0) {
fprintf(stderr, "%s: failed to speak\n", __func__);
}
} }
// remove text between brackets using regex // remove text between brackets using regex
@ -748,11 +748,7 @@ int main(int argc, char ** argv) {
} }
} }
text_to_speak = ::replace(text_to_speak, "'", "'\"'\"'"); speak_with_file(params.speak, text_to_speak, params.speak_file, voice_id);
int ret = system((params.speak + " " + std::to_string(voice_id) + " '" + text_to_speak + "'").c_str());
if (ret != 0) {
fprintf(stderr, "%s: failed to speak\n", __func__);
}
audio.clear(); audio.clear();
} }

View File

@ -1 +1,2 @@
audio.mp3 audio.mp3
to_speak.txt

View File

@ -1,20 +1,80 @@
import sys import sys
import importlib.util import argparse
import textwrap
if importlib.util.find_spec("elevenlabs") is None: parser = argparse.ArgumentParser(add_help=False,
formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument("-q", "--quick", action="store_true",
help="skip checking the required library")
modes = parser.add_argument_group("action")
modes.add_argument("inputfile", metavar="TEXTFILE",
nargs='?', type=argparse.FileType(), default=sys.stdin,
help="read the text file (default: stdin)")
modes.add_argument("-l", "--list", action="store_true",
help="show the list of voices and exit")
modes.add_argument("-h", "--help", action="help",
help="show this help and exit")
selopts = parser.add_argument_group("voice selection")
selmodes = selopts.add_mutually_exclusive_group()
selmodes.add_argument("-n", "--name",
default="Arnold",
help="get a voice object by name (default: Arnold)")
selmodes.add_argument("-v", "--voice", type=int, metavar="NUMBER",
help="get a voice object by number (see --list)")
selopts.add_argument("-f", "--filter", action="append", metavar="KEY=VAL",
default=["use case=narration"],
help=textwrap.dedent('''\
filter voices by labels (default: "use case=narration")
this option can be used multiple times
filtering will be disabled if the first -f has no "=" (e.g. -f "any")
'''))
outmodes = parser.add_argument_group("output")
outgroup = outmodes.add_mutually_exclusive_group()
outgroup.add_argument("-s", "--save", metavar="FILE",
default="audio.mp3",
help="save the TTS to a file (default: audio.mp3)")
outgroup.add_argument("-p", "--play", action="store_true",
help="play the TTS with ffplay")
args = parser.parse_args()
if not args.quick:
import importlib.util
if importlib.util.find_spec("elevenlabs") is None:
print("elevenlabs library is not installed, you can install it to your enviroment using 'pip install elevenlabs'") print("elevenlabs library is not installed, you can install it to your enviroment using 'pip install elevenlabs'")
sys.exit() sys.exit()
from elevenlabs import generate, play, save from elevenlabs import voices, generate, play, save
# Get a Voice object, by name or UUID if args.filter and "=" in args.filter[0]:
voice = "Arnold" #Possible Voices: Adam Antoni Arnold Bella Domi Elli Josh voicelist = voices()
for f in args.filter:
label, value = f.split("=")
voicelist = filter(lambda x: x.labels.get(label) == value, voicelist)
voicelist = list(voicelist)
else:
voicelist = list(voices())
if args.list:
for i, v in enumerate(voicelist):
print(str(i) + ": " + v.name + " " + str(v.labels))
sys.exit()
if args.voice:
voice = voicelist[args.voice % len(voicelist)]
else:
voice = args.name
# if -n should consult -f, use the following
#voice = next(x for x in voicelist if x.name == args.name)
# Generate the TTS
audio = generate( audio = generate(
text=str(sys.argv[2:]), text=str(args.inputfile.read()),
voice=voice voice=voice
) )
if args.play:
# Save the TTS to a file play(audio)
save(audio, "audio.mp3") else:
save(audio, args.save)

View File

@ -1,24 +1,40 @@
#!/bin/bash #!/bin/bash
# Usage: # Usage:
# speak.sh <voice_id> <text-to-speak> # speak <voice_id> <textfile>
# espeak function installed() { command -v $1 >/dev/null 2>&1; }
# Mac OS: brew install espeak
# Linux: apt-get install espeak
#
#espeak -v en-us+m$1 -s 175 -p 50 -a 200 -g 5 -k 5 "$2"
# Mac OS "say" command if installed espeak; then
say "$2" espeak -v en-us+m$1 -s 225 -p 50 -a 200 -g 5 -k 5 -f $2
elif installed piper && installed aplay; then
cat $2 | piper --model ~/en_US-lessac-medium.onnx --output-raw | aplay -q -r 22050 -f S16_LE -t raw -
# for Mac
elif installed say; then
say -f $2
# Eleven Labs # Eleven Labs
# To use it, install the elevenlabs module from pip (pip install elevenlabs) elif installed python3 && \
# It's possible to use the API for free with limited number of characters. To increase this limit register to https://beta.elevenlabs.io to get an api key and paste it after 'ELEVEN_API_KEY=' python3 -c 'import importlib.util; exit(not importlib.util.find_spec("elevenlabs"))' && \
#Keep the line commented to use the free version without api key installed ffplay; then
# # It's possible to use the API for free with limited number of characters.
#export ELEVEN_API_KEY=your_api_key # To increase this limit register to https://beta.elevenlabs.io to get an api key
#wd=$(dirname $0) # and paste it after 'ELEVEN_API_KEY='
#script=$wd/eleven-labs.py # Keep the line commented to use the free version without api key
#python3 $script $1 "$2" #export ELEVEN_API_KEY=your_api_key
#ffplay -autoexit -nodisp -loglevel quiet -hide_banner -i ./audio.mp3 wd=$(dirname $0)
script=$wd/eleven-labs.py
python3 $script -q -p -v $1 $2 >/dev/null 2>&1
# Uncomment to keep the audio file
#python3 $script -q -s ./audio.mp3 -v $1 $2 >/dev/null 2>&1
#ffplay -autoexit -nodisp -loglevel quiet -hide_banner -i ./audio.mp3 >/dev/null 2>&1
else
echo 'Install espeak ("brew install espeak" or "apt-get install espeak"),'
echo 'piper ("pip install piper-tts" or https://github.com/rhasspy/piper) with aplay,'
echo 'or elevenlabs ("pip install elevenlabs") with ffplay.'
echo '(export ELEVEN_API_KEY if you have an api key from https://beta.elevenlabs.io)'
fi

View File

@ -1,12 +1,14 @@
# Set-ExecutionPolicy -ExecutionPolicy Bypass -Scope CurrentUser # Set-ExecutionPolicy -ExecutionPolicy Bypass -Scope CurrentUser
param( param(
# voice options are David or Zira [Parameter(Mandatory=$true)][int]$voicenum,
[Parameter(Mandatory=$true)][string]$voice, [Parameter(Mandatory=$true)][string]$textfile
[Parameter(Mandatory=$true)][string]$text
) )
Add-Type -AssemblyName System.Speech; Add-Type -AssemblyName System.Speech;
$speak = New-Object System.Speech.Synthesis.SpeechSynthesizer; $speak = New-Object System.Speech.Synthesis.SpeechSynthesizer;
$speak.SelectVoice("Microsoft $voice Desktop"); $voiceoptions = $speak.GetInstalledVoices("en-US");
$voice = $voiceoptions[$voicenum % $voiceoptions.count];
$speak.SelectVoice($voice.VoiceInfo.Name);
$speak.Rate="0"; $speak.Rate="0";
$text = Get-Content -Path $textfile;
$speak.Speak($text); $speak.Speak($text);

View File

@ -38,6 +38,7 @@ struct whisper_params {
std::string model_wsp = "models/ggml-base.en.bin"; std::string model_wsp = "models/ggml-base.en.bin";
std::string model_gpt = "models/ggml-gpt-2-117M.bin"; std::string model_gpt = "models/ggml-gpt-2-117M.bin";
std::string speak = "./examples/talk/speak"; std::string speak = "./examples/talk/speak";
std::string speak_file= "./examples/talk/to_speak.txt";
std::string fname_out; std::string fname_out;
}; };
@ -68,6 +69,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
else if (arg == "-mw" || arg == "--model-whisper") { params.model_wsp = argv[++i]; } else if (arg == "-mw" || arg == "--model-whisper") { params.model_wsp = argv[++i]; }
else if (arg == "-mg" || arg == "--model-gpt") { params.model_gpt = argv[++i]; } else if (arg == "-mg" || arg == "--model-gpt") { params.model_gpt = argv[++i]; }
else if (arg == "-s" || arg == "--speak") { params.speak = argv[++i]; } else if (arg == "-s" || arg == "--speak") { params.speak = argv[++i]; }
else if (arg == "-sf" || arg == "--speak_file") { params.speak_file = argv[++i]; }
else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; } else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
else { else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
@ -102,6 +104,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
fprintf(stderr, " -mw FILE, --model-whisper [%-7s] whisper model file\n", params.model_wsp.c_str()); fprintf(stderr, " -mw FILE, --model-whisper [%-7s] whisper model file\n", params.model_wsp.c_str());
fprintf(stderr, " -mg FILE, --model-gpt [%-7s] gpt model file\n", params.model_gpt.c_str()); fprintf(stderr, " -mg FILE, --model-gpt [%-7s] gpt model file\n", params.model_gpt.c_str());
fprintf(stderr, " -s FILE, --speak TEXT [%-7s] command for TTS\n", params.speak.c_str()); fprintf(stderr, " -s FILE, --speak TEXT [%-7s] command for TTS\n", params.speak.c_str());
fprintf(stderr, " -sf FILE, --speak_file [%-7s] file to pass to TTS\n", params.speak_file.c_str());
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str()); fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
fprintf(stderr, "\n"); fprintf(stderr, "\n");
} }
@ -316,7 +319,7 @@ int main(int argc, char ** argv) {
std::string prompt = ::replace(::replace(k_prompt, "{0}", params.person), "{1}", prompt_base); std::string prompt = ::replace(::replace(k_prompt, "{0}", params.person), "{1}", prompt_base);
text_to_speak = gpt2_gen_text(ctx_gpt, prompt.c_str(), params.max_tokens); text_to_speak = gpt2_gen_text(ctx_gpt, prompt.c_str(), params.max_tokens);
text_to_speak = std::regex_replace(text_to_speak, std::regex("[^a-zA-Z0-9\\.,\\?!\\s\\:\\'\\-]"), ""); //text_to_speak = std::regex_replace(text_to_speak, std::regex("[^a-zA-Z0-9\\.,\\?!\\s\\:\\'\\-]"), "");
text_to_speak = text_to_speak.substr(0, text_to_speak.find_first_of('\n')); text_to_speak = text_to_speak.substr(0, text_to_speak.find_first_of('\n'));
// remove first 2 lines of base prompt // remove first 2 lines of base prompt
@ -354,10 +357,7 @@ int main(int argc, char ** argv) {
gpt2_set_prompt(ctx_gpt, prompt_base.c_str()); gpt2_set_prompt(ctx_gpt, prompt_base.c_str());
text_to_speak = ::replace(text_to_speak, params.person + ": ", ""); text_to_speak = ::replace(text_to_speak, params.person + ": ", "");
int ret = system((params.speak + " " + std::to_string(voice_id) + " \"" + text_to_speak + "\"").c_str()); speak_with_file(params.speak, text_to_speak, params.speak_file, voice_id);
if (ret != 0) {
fprintf(stderr, "%s: system() failed!\n", __func__);
}
audio.clear(); audio.clear();