Spaces:
Runtime error
Runtime error
liuyang
commited on
Commit
·
3a6e3af
1
Parent(s):
928c477
Enhance transcription segment coverage calculation: Updated the overlap check to consider total coverage from all transcription segments, ensuring segments are re-transcribed if less than 85% of their duration is covered. This improves accuracy in identifying segments needing attention.
Browse files
app.py
CHANGED
|
@@ -726,7 +726,16 @@ class WhisperTranscriber:
|
|
| 726 |
d_end = float(dseg["end"])
|
| 727 |
has_overlap = False
|
| 728 |
for seg in transcription_results:
|
| 729 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 730 |
has_overlap = True
|
| 731 |
break
|
| 732 |
if not has_overlap:
|
|
|
|
| 726 |
d_end = float(dseg["end"])
|
| 727 |
has_overlap = False
|
| 728 |
for seg in transcription_results:
|
| 729 |
+
# Instead of just checking if any single segment covers 60%,
|
| 730 |
+
# let's calculate total coverage from ALL transcription segments
|
| 731 |
+
total_coverage = 0.0
|
| 732 |
+
for s in transcription_results:
|
| 733 |
+
overlap = interval_overlap(d_start, d_end, float(s["start"]), float(s["end"]))
|
| 734 |
+
total_coverage += overlap
|
| 735 |
+
|
| 736 |
+
coverage_ratio = total_coverage / (d_end - d_start)
|
| 737 |
+
if coverage_ratio < 0.85: # Less than 80% covered
|
| 738 |
+
# This diarization segment needs re-transcription
|
| 739 |
has_overlap = True
|
| 740 |
break
|
| 741 |
if not has_overlap:
|