liuyang commited on
Commit
3a6e3af
·
1 Parent(s): 928c477

Enhance transcription segment coverage calculation: Updated the overlap check to consider total coverage from all transcription segments, ensuring segments are re-transcribed if less than 85% of their duration is covered. This improves accuracy in identifying segments needing attention.

Browse files
Files changed (1) hide show
  1. app.py +10 -1
app.py CHANGED
@@ -726,7 +726,16 @@ class WhisperTranscriber:
726
  d_end = float(dseg["end"])
727
  has_overlap = False
728
  for seg in transcription_results:
729
- if interval_overlap(d_start, d_end, float(seg["start"]), float(seg["end"])) > abs(d_start - d_end) * 0.6:
 
 
 
 
 
 
 
 
 
730
  has_overlap = True
731
  break
732
  if not has_overlap:
 
726
  d_end = float(dseg["end"])
727
  has_overlap = False
728
  for seg in transcription_results:
729
+ # Instead of just checking if any single segment covers 60%,
730
+ # let's calculate total coverage from ALL transcription segments
731
+ total_coverage = 0.0
732
+ for s in transcription_results:
733
+ overlap = interval_overlap(d_start, d_end, float(s["start"]), float(s["end"]))
734
+ total_coverage += overlap
735
+
736
+ coverage_ratio = total_coverage / (d_end - d_start)
737
+ if coverage_ratio < 0.85: # Less than 80% covered
738
+ # This diarization segment needs re-transcription
739
  has_overlap = True
740
  break
741
  if not has_overlap: