whisper : split_on_word
no longer trims (#1046)
This commit is contained in:
parent
3f7a03ebe3
commit
72deb41eb2
28
whisper.cpp
28
whisper.cpp
@ -3401,26 +3401,6 @@ static void whisper_exp_compute_token_level_timestamps(
|
|||||||
float thold_pt,
|
float thold_pt,
|
||||||
float thold_ptsum);
|
float thold_ptsum);
|
||||||
|
|
||||||
// trim from start (in place)
|
|
||||||
static inline void ltrim(std::string &s) {
|
|
||||||
s.erase(s.begin(), std::find_if_not(s.begin(), s.end(), [](unsigned char ch) {
|
|
||||||
return std::isspace(ch);
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
|
|
||||||
// trim from end (in place)
|
|
||||||
static inline void rtrim(std::string &s) {
|
|
||||||
s.erase(std::find_if_not(s.rbegin(), s.rend(), [](unsigned char ch) {
|
|
||||||
return std::isspace(ch);
|
|
||||||
}).base(), s.end());
|
|
||||||
}
|
|
||||||
|
|
||||||
// trim from both ends (in place)
|
|
||||||
static inline void trim(std::string &s) {
|
|
||||||
rtrim(s);
|
|
||||||
ltrim(s);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool should_split_on_word(const char * txt, bool split_on_word) {
|
static inline bool should_split_on_word(const char * txt, bool split_on_word) {
|
||||||
if (!split_on_word) return true;
|
if (!split_on_word) return true;
|
||||||
|
|
||||||
@ -3447,11 +3427,6 @@ static int whisper_wrap_segment(struct whisper_context & ctx, struct whisper_sta
|
|||||||
const int cur = strlen(txt);
|
const int cur = strlen(txt);
|
||||||
|
|
||||||
if (acc + cur > max_len && i > 0 && should_split_on_word(txt, split_on_word)) {
|
if (acc + cur > max_len && i > 0 && should_split_on_word(txt, split_on_word)) {
|
||||||
// split here
|
|
||||||
if (split_on_word) {
|
|
||||||
trim(text);
|
|
||||||
}
|
|
||||||
|
|
||||||
state.result_all.back().text = std::move(text);
|
state.result_all.back().text = std::move(text);
|
||||||
state.result_all.back().t1 = token.t0;
|
state.result_all.back().t1 = token.t0;
|
||||||
state.result_all.back().tokens.resize(i);
|
state.result_all.back().tokens.resize(i);
|
||||||
@ -3479,9 +3454,6 @@ static int whisper_wrap_segment(struct whisper_context & ctx, struct whisper_sta
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (split_on_word) {
|
|
||||||
trim(text);
|
|
||||||
}
|
|
||||||
state.result_all.back().text = std::move(text);
|
state.result_all.back().text = std::move(text);
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
|
Loading…
Reference in New Issue
Block a user