Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -300,7 +300,7 @@ def segment_audio_from_video(video_path):
|
|
| 300 |
logger.info("Audio transcription completed")
|
| 301 |
except Exception as e:
|
| 302 |
logger.error(f"❌ WhisperX pipeline failed: {e}")
|
| 303 |
-
return audio_path, []
|
| 304 |
|
| 305 |
# Return segment boundaries (only timestamps, not text)
|
| 306 |
transcript_with_speakers = [
|
|
@@ -312,7 +312,7 @@ def segment_audio_from_video(video_path):
|
|
| 312 |
if segment["end"] > segment["start"]
|
| 313 |
]
|
| 314 |
|
| 315 |
-
return audio_path, transcript_with_speakers
|
| 316 |
|
| 317 |
def clean_transcribed_text(text: str) -> str:
|
| 318 |
"""
|
|
@@ -1388,7 +1388,7 @@ def upload_and_manage(file, target_language, process_mode):
|
|
| 1388 |
|
| 1389 |
# Step 1: Segment audio from the uploaded video/audio file
|
| 1390 |
logger.info("Segmenting audio...")
|
| 1391 |
-
temp_audio_for_vad, speech_segments = segment_audio_from_video(file.name)
|
| 1392 |
if not speech_segments:
|
| 1393 |
raise Exception("No speech segments detected in the audio.")
|
| 1394 |
logger.info(f"Audio segmentation completed. Found {len(speech_segments)} segments.")
|
|
@@ -1417,7 +1417,7 @@ def upload_and_manage(file, target_language, process_mode):
|
|
| 1417 |
|
| 1418 |
# Step 3: Add transcript to video based on timestamps
|
| 1419 |
logger.info("Adding translated transcript to video...")
|
| 1420 |
-
add_transcript_voiceover(file.name, translated_json, output_video_path, process_mode, target_language)
|
| 1421 |
logger.info(f"Transcript added to video. Output video saved at {output_video_path}")
|
| 1422 |
|
| 1423 |
# Convert translated JSON into a format for the editable table
|
|
|
|
| 300 |
logger.info("Audio transcription completed")
|
| 301 |
except Exception as e:
|
| 302 |
logger.error(f"❌ WhisperX pipeline failed: {e}")
|
| 303 |
+
return audio_path, segment_result, []
|
| 304 |
|
| 305 |
# Return segment boundaries (only timestamps, not text)
|
| 306 |
transcript_with_speakers = [
|
|
|
|
| 312 |
if segment["end"] > segment["start"]
|
| 313 |
]
|
| 314 |
|
| 315 |
+
return audio_path, segment_result, transcript_with_speakers
|
| 316 |
|
| 317 |
def clean_transcribed_text(text: str) -> str:
|
| 318 |
"""
|
|
|
|
| 1388 |
|
| 1389 |
# Step 1: Segment audio from the uploaded video/audio file
|
| 1390 |
logger.info("Segmenting audio...")
|
| 1391 |
+
temp_audio_for_vad, background_audio_path, speech_segments = segment_audio_from_video(file.name)
|
| 1392 |
if not speech_segments:
|
| 1393 |
raise Exception("No speech segments detected in the audio.")
|
| 1394 |
logger.info(f"Audio segmentation completed. Found {len(speech_segments)} segments.")
|
|
|
|
| 1417 |
|
| 1418 |
# Step 3: Add transcript to video based on timestamps
|
| 1419 |
logger.info("Adding translated transcript to video...")
|
| 1420 |
+
add_transcript_voiceover(file.name, translated_json, output_video_path, process_mode, target_language, background_audio_path = background_audio_path)
|
| 1421 |
logger.info(f"Transcript added to video. Output video saved at {output_video_path}")
|
| 1422 |
|
| 1423 |
# Convert translated JSON into a format for the editable table
|