Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -281,15 +281,22 @@ def transcribe_video_with_speakers(video_path):
|
|
| 281 |
|
| 282 |
return transcript_with_speakers, detected_language
|
| 283 |
|
| 284 |
-
def segment_audio_from_video(video_path):
|
| 285 |
# Extract audio from video
|
| 286 |
video = VideoFileClip(video_path)
|
| 287 |
audio_path = "audio.wav"
|
| 288 |
video.audio.write_audiofile(audio_path)
|
| 289 |
logger.info(f"Audio extracted from video: {audio_path}")
|
| 290 |
|
| 291 |
-
segment_result
|
| 292 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
|
| 294 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 295 |
logger.info(f"Using device: {device}")
|
|
@@ -1333,7 +1340,8 @@ def calibrated_speed(text, desired_duration):
|
|
| 1333 |
slope = (1.7 - 1.0) / (25.2 - 14)
|
| 1334 |
return 1.0 + slope * (cps - 14)
|
| 1335 |
|
| 1336 |
-
|
|
|
|
| 1337 |
if file is None:
|
| 1338 |
logger.info("No file uploaded. Please upload a video/audio file.")
|
| 1339 |
return None, [], None, "No file uploaded. Please upload a video/audio file."
|
|
@@ -1343,7 +1351,7 @@ def upload_and_manage(file, target_language, process_mode):
|
|
| 1343 |
logger.info(f"Started processing file: {file.name}")
|
| 1344 |
|
| 1345 |
# Define paths for audio and output files
|
| 1346 |
-
audio_path = "audio.wav"
|
| 1347 |
output_video_path = "output_video.mp4"
|
| 1348 |
voiceover_path = "voiceover.wav"
|
| 1349 |
translated_json_filepath = "translated_output.json"
|
|
@@ -1352,7 +1360,11 @@ def upload_and_manage(file, target_language, process_mode):
|
|
| 1352 |
|
| 1353 |
# Step 1: Segment audio from the uploaded video/audio file
|
| 1354 |
logger.info("Segmenting audio...")
|
| 1355 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1356 |
if not speech_segments:
|
| 1357 |
raise Exception("No speech segments detected in the audio.")
|
| 1358 |
logger.info(f"Audio segmentation completed. Found {len(speech_segments)} segments.")
|
|
@@ -1386,7 +1398,7 @@ def upload_and_manage(file, target_language, process_mode):
|
|
| 1386 |
with open(translated_json_filepath, "w", encoding="utf-8") as f:
|
| 1387 |
json.dump(translated_json, f, ensure_ascii=False, indent=4)
|
| 1388 |
logger.info(f"Translated JSON saved to {translated_json_filepath}")
|
| 1389 |
-
|
| 1390 |
# Step 3: Add transcript to video based on timestamps
|
| 1391 |
logger.info("Adding translated transcript to video...")
|
| 1392 |
add_transcript_voiceover(file.name, translated_json, output_video_path, process_mode, target_language, background_audio_path = background_audio_path)
|
|
@@ -1430,7 +1442,15 @@ def build_interface():
|
|
| 1430 |
process_mode = gr.Radio(choices=[("Transcription Only", 1),
|
| 1431 |
("Transcription with Premium Voice", 2),
|
| 1432 |
("Transcription with Voice Clone", 3)],
|
| 1433 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1434 |
submit_button = gr.Button("Post and Process")
|
| 1435 |
with gr.Column(scale=8):
|
| 1436 |
gr.Markdown("## Edit Translations")
|
|
@@ -1475,7 +1495,7 @@ def build_interface():
|
|
| 1475 |
)
|
| 1476 |
submit_button.click(
|
| 1477 |
upload_and_manage,
|
| 1478 |
-
inputs=[file_input, language_input, process_mode],
|
| 1479 |
outputs=[editable_table, processed_video_output, translated_json_download, elapsed_time_display]
|
| 1480 |
)
|
| 1481 |
# Connect submit button to save_feedback_db function
|
|
@@ -1489,4 +1509,4 @@ def build_interface():
|
|
| 1489 |
tts_model = None
|
| 1490 |
# Launch the Gradio interface
|
| 1491 |
demo = build_interface()
|
| 1492 |
-
demo.launch()
|
|
|
|
| 281 |
|
| 282 |
return transcript_with_speakers, detected_language
|
| 283 |
|
| 284 |
+
def segment_audio_from_video(video_path, separate_background = True):
|
| 285 |
# Extract audio from video
|
| 286 |
video = VideoFileClip(video_path)
|
| 287 |
audio_path = "audio.wav"
|
| 288 |
video.audio.write_audiofile(audio_path)
|
| 289 |
logger.info(f"Audio extracted from video: {audio_path}")
|
| 290 |
|
| 291 |
+
segment_result = None
|
| 292 |
+
speech_audio_path = audio_path
|
| 293 |
+
|
| 294 |
+
if separate_background:
|
| 295 |
+
# Assuming segment_background_audio returns a tuple (segment_result, speech_audio_path)
|
| 296 |
+
segment_result, speech_audio_path = segment_background_audio(audio_path)
|
| 297 |
+
print(f"Saved non-speech (background) audio to local")
|
| 298 |
+
else:
|
| 299 |
+
logger.info("Background audio separation skipped as per separate_background=False.")
|
| 300 |
|
| 301 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 302 |
logger.info(f"Using device: {device}")
|
|
|
|
| 1340 |
slope = (1.7 - 1.0) / (25.2 - 14)
|
| 1341 |
return 1.0 + slope * (cps - 14)
|
| 1342 |
|
| 1343 |
+
# Modified upload_and_manage function
|
| 1344 |
+
def upload_and_manage(file, target_language, process_mode, separate_background_audio): # Added separate_background_audio
|
| 1345 |
if file is None:
|
| 1346 |
logger.info("No file uploaded. Please upload a video/audio file.")
|
| 1347 |
return None, [], None, "No file uploaded. Please upload a video/audio file."
|
|
|
|
| 1351 |
logger.info(f"Started processing file: {file.name}")
|
| 1352 |
|
| 1353 |
# Define paths for audio and output files
|
| 1354 |
+
audio_path = "audio.wav" # This will be the full extracted audio
|
| 1355 |
output_video_path = "output_video.mp4"
|
| 1356 |
voiceover_path = "voiceover.wav"
|
| 1357 |
translated_json_filepath = "translated_output.json"
|
|
|
|
| 1360 |
|
| 1361 |
# Step 1: Segment audio from the uploaded video/audio file
|
| 1362 |
logger.info("Segmenting audio...")
|
| 1363 |
+
# Pass the separate_background_audio boolean from the Gradio input
|
| 1364 |
+
temp_audio_for_vad, background_audio_path, speech_segments = segment_audio_from_video(
|
| 1365 |
+
file.name,
|
| 1366 |
+
separate_background=separate_background_audio
|
| 1367 |
+
)
|
| 1368 |
if not speech_segments:
|
| 1369 |
raise Exception("No speech segments detected in the audio.")
|
| 1370 |
logger.info(f"Audio segmentation completed. Found {len(speech_segments)} segments.")
|
|
|
|
| 1398 |
with open(translated_json_filepath, "w", encoding="utf-8") as f:
|
| 1399 |
json.dump(translated_json, f, ensure_ascii=False, indent=4)
|
| 1400 |
logger.info(f"Translated JSON saved to {translated_json_filepath}")
|
| 1401 |
+
|
| 1402 |
# Step 3: Add transcript to video based on timestamps
|
| 1403 |
logger.info("Adding translated transcript to video...")
|
| 1404 |
add_transcript_voiceover(file.name, translated_json, output_video_path, process_mode, target_language, background_audio_path = background_audio_path)
|
|
|
|
| 1442 |
process_mode = gr.Radio(choices=[("Transcription Only", 1),
|
| 1443 |
("Transcription with Premium Voice", 2),
|
| 1444 |
("Transcription with Voice Clone", 3)],
|
| 1445 |
+
label="Choose Processing Type", value=1)
|
| 1446 |
+
|
| 1447 |
+
# New Gradio Checkbox for background audio separation
|
| 1448 |
+
separate_background_checkbox = gr.Checkbox(
|
| 1449 |
+
label="Separate Background Audio (Recommended)",
|
| 1450 |
+
value=True, # Default to True
|
| 1451 |
+
interactive=True
|
| 1452 |
+
)
|
| 1453 |
+
|
| 1454 |
submit_button = gr.Button("Post and Process")
|
| 1455 |
with gr.Column(scale=8):
|
| 1456 |
gr.Markdown("## Edit Translations")
|
|
|
|
| 1495 |
)
|
| 1496 |
submit_button.click(
|
| 1497 |
upload_and_manage,
|
| 1498 |
+
inputs=[file_input, language_input, process_mode, separate_background_checkbox], # Add checkbox as input
|
| 1499 |
outputs=[editable_table, processed_video_output, translated_json_download, elapsed_time_display]
|
| 1500 |
)
|
| 1501 |
# Connect submit button to save_feedback_db function
|
|
|
|
| 1509 |
tts_model = None
|
| 1510 |
# Launch the Gradio interface
|
| 1511 |
demo = build_interface()
|
| 1512 |
+
demo.launch()
|