daihui.zhang commited on
Commit
cd7fb92
·
1 Parent(s): fa46942
Files changed (1) hide show
  1. transcribe/strategy.py +6 -6
transcribe/strategy.py CHANGED
@@ -253,8 +253,8 @@ class TranscriptStabilityAnalyzer:
253
  chunks = curr.split_by(SplitMode.PUNCTUATION)
254
  if len(chunks) > 2:
255
  stable, remaining = chunks[:-2], chunks[-2:]
256
- stable_str = self.merge_chunks(stable)
257
- remaining_str = self.merge_chunks(remaining)
258
  yield from self._yield_commit_results(
259
  stable, remaining, is_end_sentence=True # 暂时硬编码为True
260
  )
@@ -266,10 +266,10 @@ class TranscriptStabilityAnalyzer:
266
 
267
 
268
  def _yield_commit_results(self, stable_chunk, remaining_chunks, is_end_sentence: bool) -> Iterator[TranscriptResult]:
269
- stable_str = stable_chunk.join() if hasattr(stable_chunk, "join") else self.merge_chunks([stable_chunk])
270
  remaining_str = self.merge_chunks(remaining_chunks)
271
 
272
- frame_cut_index = stable_chunk.get_buffer_index()
273
  logger.debug(f"Current cut index: {frame_cut_index}, Stable string: {stable_str}, Remaining_str:{remaining_str}")
274
 
275
  prev_seg_id = self._transcript_buffer.get_seg_id()
@@ -279,7 +279,7 @@ class TranscriptStabilityAnalyzer:
279
  logger.debug(f"current buffer: {self._transcript_buffer.__dict__}")
280
 
281
  if curr_seg_id > prev_seg_id:
282
- # 表示生成了一个新段落
283
  yield TranscriptResult(
284
  seg_id=prev_seg_id,
285
  cut_index=frame_cut_index,
@@ -290,7 +290,7 @@ class TranscriptStabilityAnalyzer:
290
  # 如果还有挂起的文本
291
  if (current_not_commit_text := self._transcript_buffer.current_not_commit_text.strip()):
292
  yield TranscriptResult(
293
- seg_id=self._transcript_buffer.get_seg_id(),
294
  cut_index=frame_cut_index,
295
  context=current_not_commit_text
296
  )
 
253
  chunks = curr.split_by(SplitMode.PUNCTUATION)
254
  if len(chunks) > 2:
255
  stable, remaining = chunks[:-2], chunks[-2:]
256
+ # stable_str = self.merge_chunks(stable)
257
+ # remaining_str = self.merge_chunks(remaining)
258
  yield from self._yield_commit_results(
259
  stable, remaining, is_end_sentence=True # 暂时硬编码为True
260
  )
 
266
 
267
 
268
  def _yield_commit_results(self, stable_chunk, remaining_chunks, is_end_sentence: bool) -> Iterator[TranscriptResult]:
269
+ stable_str = stable_chunk.join() if hasattr(stable_chunk, "join") else self.merge_chunks(stable_chunk)
270
  remaining_str = self.merge_chunks(remaining_chunks)
271
 
272
+ frame_cut_index = stable_chunk[-1].get_buffer_index() if isinstance(stable_chunk, list) else stable_chunk.get_buffer_index()
273
  logger.debug(f"Current cut index: {frame_cut_index}, Stable string: {stable_str}, Remaining_str:{remaining_str}")
274
 
275
  prev_seg_id = self._transcript_buffer.get_seg_id()
 
279
  logger.debug(f"current buffer: {self._transcript_buffer.__dict__}")
280
 
281
  if curr_seg_id > prev_seg_id:
282
+ # 表示生成了一个新段落 换行
283
  yield TranscriptResult(
284
  seg_id=prev_seg_id,
285
  cut_index=frame_cut_index,
 
290
  # 如果还有挂起的文本
291
  if (current_not_commit_text := self._transcript_buffer.current_not_commit_text.strip()):
292
  yield TranscriptResult(
293
+ seg_id=curr_seg_id+1,
294
  cut_index=frame_cut_index,
295
  context=current_not_commit_text
296
  )