daihui.zhang
commited on
Commit
·
cd7fb92
1
Parent(s):
fa46942
update
Browse files- transcribe/strategy.py +6 -6
transcribe/strategy.py
CHANGED
|
@@ -253,8 +253,8 @@ class TranscriptStabilityAnalyzer:
|
|
| 253 |
chunks = curr.split_by(SplitMode.PUNCTUATION)
|
| 254 |
if len(chunks) > 2:
|
| 255 |
stable, remaining = chunks[:-2], chunks[-2:]
|
| 256 |
-
stable_str = self.merge_chunks(stable)
|
| 257 |
-
remaining_str = self.merge_chunks(remaining)
|
| 258 |
yield from self._yield_commit_results(
|
| 259 |
stable, remaining, is_end_sentence=True # 暂时硬编码为True
|
| 260 |
)
|
|
@@ -266,10 +266,10 @@ class TranscriptStabilityAnalyzer:
|
|
| 266 |
|
| 267 |
|
| 268 |
def _yield_commit_results(self, stable_chunk, remaining_chunks, is_end_sentence: bool) -> Iterator[TranscriptResult]:
|
| 269 |
-
stable_str = stable_chunk.join() if hasattr(stable_chunk, "join") else self.merge_chunks(
|
| 270 |
remaining_str = self.merge_chunks(remaining_chunks)
|
| 271 |
|
| 272 |
-
frame_cut_index = stable_chunk.get_buffer_index()
|
| 273 |
logger.debug(f"Current cut index: {frame_cut_index}, Stable string: {stable_str}, Remaining_str:{remaining_str}")
|
| 274 |
|
| 275 |
prev_seg_id = self._transcript_buffer.get_seg_id()
|
|
@@ -279,7 +279,7 @@ class TranscriptStabilityAnalyzer:
|
|
| 279 |
logger.debug(f"current buffer: {self._transcript_buffer.__dict__}")
|
| 280 |
|
| 281 |
if curr_seg_id > prev_seg_id:
|
| 282 |
-
# 表示生成了一个新段落
|
| 283 |
yield TranscriptResult(
|
| 284 |
seg_id=prev_seg_id,
|
| 285 |
cut_index=frame_cut_index,
|
|
@@ -290,7 +290,7 @@ class TranscriptStabilityAnalyzer:
|
|
| 290 |
# 如果还有挂起的文本
|
| 291 |
if (current_not_commit_text := self._transcript_buffer.current_not_commit_text.strip()):
|
| 292 |
yield TranscriptResult(
|
| 293 |
-
seg_id=
|
| 294 |
cut_index=frame_cut_index,
|
| 295 |
context=current_not_commit_text
|
| 296 |
)
|
|
|
|
| 253 |
chunks = curr.split_by(SplitMode.PUNCTUATION)
|
| 254 |
if len(chunks) > 2:
|
| 255 |
stable, remaining = chunks[:-2], chunks[-2:]
|
| 256 |
+
# stable_str = self.merge_chunks(stable)
|
| 257 |
+
# remaining_str = self.merge_chunks(remaining)
|
| 258 |
yield from self._yield_commit_results(
|
| 259 |
stable, remaining, is_end_sentence=True # 暂时硬编码为True
|
| 260 |
)
|
|
|
|
| 266 |
|
| 267 |
|
| 268 |
def _yield_commit_results(self, stable_chunk, remaining_chunks, is_end_sentence: bool) -> Iterator[TranscriptResult]:
|
| 269 |
+
stable_str = stable_chunk.join() if hasattr(stable_chunk, "join") else self.merge_chunks(stable_chunk)
|
| 270 |
remaining_str = self.merge_chunks(remaining_chunks)
|
| 271 |
|
| 272 |
+
frame_cut_index = stable_chunk[-1].get_buffer_index() if isinstance(stable_chunk, list) else stable_chunk.get_buffer_index()
|
| 273 |
logger.debug(f"Current cut index: {frame_cut_index}, Stable string: {stable_str}, Remaining_str:{remaining_str}")
|
| 274 |
|
| 275 |
prev_seg_id = self._transcript_buffer.get_seg_id()
|
|
|
|
| 279 |
logger.debug(f"current buffer: {self._transcript_buffer.__dict__}")
|
| 280 |
|
| 281 |
if curr_seg_id > prev_seg_id:
|
| 282 |
+
# 表示生成了一个新段落 换行
|
| 283 |
yield TranscriptResult(
|
| 284 |
seg_id=prev_seg_id,
|
| 285 |
cut_index=frame_cut_index,
|
|
|
|
| 290 |
# 如果还有挂起的文本
|
| 291 |
if (current_not_commit_text := self._transcript_buffer.current_not_commit_text.strip()):
|
| 292 |
yield TranscriptResult(
|
| 293 |
+
seg_id=curr_seg_id+1,
|
| 294 |
cut_index=frame_cut_index,
|
| 295 |
context=current_not_commit_text
|
| 296 |
)
|