Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -639,7 +639,6 @@ def extract_ocr_subtitles_parallel(video_path, transcription_json, interval_sec=
|
|
| 639 |
logger.info(f"✅ OCR extraction completed: {len(ocr_results)} frames successful, {ocr_failures} frames failed.")
|
| 640 |
return ocr_results
|
| 641 |
|
| 642 |
-
|
| 643 |
def collapse_ocr_subtitles(ocr_json, text_similarity_threshold=90):
|
| 644 |
collapsed = []
|
| 645 |
current = None
|
|
@@ -654,17 +653,19 @@ def collapse_ocr_subtitles(ocr_json, text_similarity_threshold=90):
|
|
| 654 |
sim = fuzz.ratio(current["text"], text)
|
| 655 |
if sim >= text_similarity_threshold:
|
| 656 |
current["end"] = time
|
|
|
|
| 657 |
else:
|
|
|
|
|
|
|
|
|
|
| 658 |
collapsed.append(current)
|
| 659 |
current = {"start": time, "end": time, "text": text}
|
| 660 |
if current:
|
| 661 |
collapsed.append(current)
|
| 662 |
|
| 663 |
-
# Log collapsed OCR summary
|
| 664 |
logger.info(f"✅ OCR subtitles collapsed into {len(collapsed)} segments.")
|
| 665 |
for idx, seg in enumerate(collapsed):
|
| 666 |
logger.debug(f"[OCR Collapsed {idx}] {seg['start']:.2f}s - {seg['end']:.2f}s: {seg['text'][:50]}...")
|
| 667 |
-
|
| 668 |
return collapsed
|
| 669 |
|
| 670 |
def merge_speaker_and_time_from_whisperx(ocr_json, whisperx_json, text_sim_threshold=80, replace_threshold=90):
|
|
|
|
| 639 |
logger.info(f"✅ OCR extraction completed: {len(ocr_results)} frames successful, {ocr_failures} frames failed.")
|
| 640 |
return ocr_results
|
| 641 |
|
|
|
|
| 642 |
def collapse_ocr_subtitles(ocr_json, text_similarity_threshold=90):
|
| 643 |
collapsed = []
|
| 644 |
current = None
|
|
|
|
| 653 |
sim = fuzz.ratio(current["text"], text)
|
| 654 |
if sim >= text_similarity_threshold:
|
| 655 |
current["end"] = time
|
| 656 |
+
logger.debug(f"MERGED: Current end extended to {time:.2f}s for text: '{current['text'][:50]}...' (Similarity: {sim})")
|
| 657 |
else:
|
| 658 |
+
logger.debug(f"NOT MERGING (Similarity: {sim} < Threshold: {text_similarity_threshold}):")
|
| 659 |
+
logger.debug(f" Previous segment: {current['start']:.2f}s - {current['end']:.2f}s: '{current['text'][:50]}...'")
|
| 660 |
+
logger.debug(f" New segment: {time:.2f}s: '{text[:50]}...'")
|
| 661 |
collapsed.append(current)
|
| 662 |
current = {"start": time, "end": time, "text": text}
|
| 663 |
if current:
|
| 664 |
collapsed.append(current)
|
| 665 |
|
|
|
|
| 666 |
logger.info(f"✅ OCR subtitles collapsed into {len(collapsed)} segments.")
|
| 667 |
for idx, seg in enumerate(collapsed):
|
| 668 |
logger.debug(f"[OCR Collapsed {idx}] {seg['start']:.2f}s - {seg['end']:.2f}s: {seg['text'][:50]}...")
|
|
|
|
| 669 |
return collapsed
|
| 670 |
|
| 671 |
def merge_speaker_and_time_from_whisperx(ocr_json, whisperx_json, text_sim_threshold=80, replace_threshold=90):
|