diff --git a/src/whisper.cpp b/src/whisper.cpp index ddeecc5..810a8d2 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -6060,7 +6060,7 @@ int whisper_full_with_state( { const auto & best_decoder = state->decoders[best_decoder_id]; - const auto seek_delta = best_decoder.seek_delta; + auto seek_delta = best_decoder.seek_delta; const auto result_len = best_decoder.sequence.result_len; const auto & tokens_cur = best_decoder.sequence.tokens; @@ -6201,6 +6201,15 @@ int whisper_full_with_state( } } + // ref: https://github.com/ggerganov/whisper.cpp/pull/2629 + const bool single_timestamp_ending = tokens_cur.size() > 1 && + tokens_cur[tokens_cur.size() - 2].id < whisper_token_beg(ctx) && + tokens_cur[tokens_cur.size() - 1].id > whisper_token_beg(ctx); + if (single_timestamp_ending) { + WHISPER_LOG_DEBUG("single timestamp ending - skip entire chunk\n"); + seek_delta = std::min(seek_end - seek, WHISPER_CHUNK_SIZE * 100); + } + // update audio window seek += seek_delta;