Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -313,13 +313,14 @@ def segment_audio_from_video(video_path):
|
|
| 313 |
]
|
| 314 |
|
| 315 |
return audio_path, transcript_with_speakers
|
| 316 |
-
|
| 317 |
def clean_transcribed_text(text: str) -> str:
|
| 318 |
-
"""
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
|
|
|
|
|
|
| 323 |
|
| 324 |
def transcribe_segments_with_scribe(full_audio_path, segments):
|
| 325 |
transcribed_segments = []
|
|
@@ -388,7 +389,7 @@ def transcribe_segments_with_scribe(full_audio_path, segments):
|
|
| 388 |
audio_clip.close()
|
| 389 |
|
| 390 |
return transcribed_segments, detected_language, error_message
|
| 391 |
-
|
| 392 |
# Function to get the appropriate translation model based on target language
|
| 393 |
def get_translation_model(source_language, target_language):
|
| 394 |
"""
|
|
|
|
| 313 |
]
|
| 314 |
|
| 315 |
return audio_path, transcript_with_speakers
|
|
|
|
| 316 |
def clean_transcribed_text(text: str) -> str:
|
| 317 |
+
"""
|
| 318 |
+
Remove noise tags like (panting), [booming sound], repeated symbols, and trim whitespace.
|
| 319 |
+
"""
|
| 320 |
+
text = re.sub(r"[\(\[\{].*?[\)\]\}]", "", text)
|
| 321 |
+
text = re.sub(r"[_,.~`^•·。!?!?,,\.\/\\\-–—=+]+", " ", text)
|
| 322 |
+
text = re.sub(r"\s+", " ", text).strip()
|
| 323 |
+
return text
|
| 324 |
|
| 325 |
def transcribe_segments_with_scribe(full_audio_path, segments):
|
| 326 |
transcribed_segments = []
|
|
|
|
| 389 |
audio_clip.close()
|
| 390 |
|
| 391 |
return transcribed_segments, detected_language, error_message
|
| 392 |
+
|
| 393 |
# Function to get the appropriate translation model based on target language
|
| 394 |
def get_translation_model(source_language, target_language):
|
| 395 |
"""
|