Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -985,53 +985,6 @@ def merge_speaker_and_time_from_whisperx(
|
|
| 985 |
|
| 986 |
return merged
|
| 987 |
|
| 988 |
-
# def merge_speaker_and_time_from_whisperx(ocr_json, whisperx_json, text_sim_threshold=80, replace_threshold=90):
|
| 989 |
-
# merged = []
|
| 990 |
-
# used_whisperx = set()
|
| 991 |
-
|
| 992 |
-
# for ocr in ocr_json:
|
| 993 |
-
# ocr_start = ocr["start"]
|
| 994 |
-
# ocr_end = ocr["end"]
|
| 995 |
-
# ocr_text = ocr["text"]
|
| 996 |
-
|
| 997 |
-
# best_match = None
|
| 998 |
-
# best_score = -1
|
| 999 |
-
# best_idx = None
|
| 1000 |
-
|
| 1001 |
-
# for idx, wx in enumerate(whisperx_json):
|
| 1002 |
-
# wx_start, wx_end = wx["start"], wx["end"]
|
| 1003 |
-
# wx_text = wx["text"]
|
| 1004 |
-
|
| 1005 |
-
# if idx in used_whisperx:
|
| 1006 |
-
# continue # Already matched
|
| 1007 |
-
|
| 1008 |
-
# time_center_diff = abs((ocr_start + ocr_end)/2 - (wx_start + wx_end)/2)
|
| 1009 |
-
# if time_center_diff > 3:
|
| 1010 |
-
# continue
|
| 1011 |
-
|
| 1012 |
-
# sim = fuzz.ratio(ocr_text, wx_text)
|
| 1013 |
-
# if sim > best_score:
|
| 1014 |
-
# best_score = sim
|
| 1015 |
-
# best_match = wx
|
| 1016 |
-
# best_idx = idx
|
| 1017 |
-
|
| 1018 |
-
# new_entry = copy.deepcopy(ocr)
|
| 1019 |
-
# if best_match:
|
| 1020 |
-
# new_entry["speaker"] = best_match.get("speaker", "UNKNOWN")
|
| 1021 |
-
# new_entry["ocr_similarity"] = best_score
|
| 1022 |
-
|
| 1023 |
-
# if best_score >= replace_threshold:
|
| 1024 |
-
# new_entry["start"] = best_match["start"]
|
| 1025 |
-
# new_entry["end"] = best_match["end"]
|
| 1026 |
-
# used_whisperx.add(best_idx) # Mark used
|
| 1027 |
-
|
| 1028 |
-
# else:
|
| 1029 |
-
# new_entry["speaker"] = "UNKNOWN"
|
| 1030 |
-
# new_entry["ocr_similarity"] = None
|
| 1031 |
-
|
| 1032 |
-
# merged.append(new_entry)
|
| 1033 |
-
# return merged
|
| 1034 |
-
|
| 1035 |
def realign_ocr_segments(merged_ocr_json, min_gap=0.2):
|
| 1036 |
"""
|
| 1037 |
Realign OCR segments to avoid overlaps using midpoint-based adjustment.
|
|
@@ -1166,6 +1119,8 @@ def add_transcript_voiceover(video_path, translated_json, output_path, process_m
|
|
| 1166 |
|
| 1167 |
# Sort and filter together
|
| 1168 |
results.sort(key=lambda x: x[0])
|
|
|
|
|
|
|
| 1169 |
filtered = [(translated_json[i], txt, aud, dur) for i, txt, aud, dur in results if dur > 0]
|
| 1170 |
|
| 1171 |
translated_json = [entry for entry, _, _, _ in filtered]
|
|
|
|
| 985 |
|
| 986 |
return merged
|
| 987 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 988 |
def realign_ocr_segments(merged_ocr_json, min_gap=0.2):
|
| 989 |
"""
|
| 990 |
Realign OCR segments to avoid overlaps using midpoint-based adjustment.
|
|
|
|
| 1119 |
|
| 1120 |
# Sort and filter together
|
| 1121 |
results.sort(key=lambda x: x[0])
|
| 1122 |
+
text_clips = [clip for _, clip, _, _ in results if clip]
|
| 1123 |
+
|
| 1124 |
filtered = [(translated_json[i], txt, aud, dur) for i, txt, aud, dur in results if dur > 0]
|
| 1125 |
|
| 1126 |
translated_json = [entry for entry, _, _, _ in filtered]
|