liuyang commited on
Commit
c8b690c
·
1 Parent(s): a4d86b2

update threshold

Browse files
Files changed (1) hide show
  1. app.py +10 -15
app.py CHANGED
@@ -724,21 +724,16 @@ class WhisperTranscriber:
724
  for dseg in diarization_segments:
725
  d_start = float(dseg["start"])
726
  d_end = float(dseg["end"])
727
- has_overlap = False
728
- for seg in transcription_results:
729
- # Instead of just checking if any single segment covers 60%,
730
- # let's calculate total coverage from ALL transcription segments
731
- total_coverage = 0.0
732
- for s in transcription_results:
733
- overlap = interval_overlap(d_start, d_end, float(s["start"]), float(s["end"]))
734
- total_coverage += overlap
735
-
736
- coverage_ratio = total_coverage / (d_end - d_start)
737
- if coverage_ratio >= 0.85: # Less than 80% covered
738
- # This diarization segment needs re-transcription
739
- has_overlap = True
740
- break
741
- if not has_overlap:
742
  unmatched_diarization_segments.append({
743
  "start": d_start,
744
  "end": d_end,
 
724
  for dseg in diarization_segments:
725
  d_start = float(dseg["start"])
726
  d_end = float(dseg["end"])
727
+ # Calculate total coverage
728
+ total_coverage = 0.0
729
+ for s in transcription_results:
730
+ overlap = interval_overlap(d_start, d_end, float(s["start"]), float(s["end"]))
731
+ total_coverage += overlap
732
+
733
+ coverage_ratio = total_coverage / (d_end - d_start)
734
+ is_well_covered = coverage_ratio >= 0.85 # 85% or more covered
735
+
736
+ if not is_well_covered and (d_end - d_start) > 1.5: # If poorly covered, add to unmatched list
 
 
 
 
 
737
  unmatched_diarization_segments.append({
738
  "start": d_start,
739
  "end": d_end,