Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -54,7 +54,7 @@ def separate_vocals(input_path):
|
|
| 54 |
class AudioProcessor:
|
| 55 |
def __init__(self, device="cpu"):
|
| 56 |
self.whisper_model = WhisperModel("small", device=device)
|
| 57 |
-
self.openrouter_api_key = "
|
| 58 |
self.client = OpenAI(
|
| 59 |
base_url="https://openrouter.ai/api/v1",
|
| 60 |
api_key=self.openrouter_api_key,
|
|
@@ -122,7 +122,7 @@ class AudioProcessor:
|
|
| 122 |
messages=[
|
| 123 |
{
|
| 124 |
"role": "system",
|
| 125 |
-
"content": f"You are a professional translator from
|
| 126 |
},
|
| 127 |
{
|
| 128 |
"role": "user",
|
|
@@ -260,17 +260,13 @@ async def process_audio_chunks(input_audio_path, voice, target_language):
|
|
| 260 |
final_mix.export(output_path, format="wav")
|
| 261 |
print(f"✅ Output saved as: {output_path}")
|
| 262 |
|
| 263 |
-
final_audio_path = output_path
|
| 264 |
-
final_background_path = background_path
|
| 265 |
-
|
| 266 |
cleanup_files(chunk_files)
|
| 267 |
shutil.rmtree(temp_dir, ignore_errors=True)
|
| 268 |
-
return
|
| 269 |
|
| 270 |
# --- Gradio Interface ---
|
| 271 |
def gradio_interface(video_file, voice, target_language):
|
| 272 |
try:
|
| 273 |
-
# Create temporary directory for processing
|
| 274 |
temp_dir = Path(tempfile.mkdtemp())
|
| 275 |
input_video_path = temp_dir / "input_video.mp4"
|
| 276 |
|
|
@@ -278,26 +274,21 @@ def gradio_interface(video_file, voice, target_language):
|
|
| 278 |
if not os.path.splitext(video_file.name)[1].lower() in ['.mp4', '.mov', '.avi', '.mkv']:
|
| 279 |
raise ValueError("Invalid file type. Please upload a video file.")
|
| 280 |
|
| 281 |
-
# Save the uploaded file to the temporary directory
|
| 282 |
shutil.copyfile(video_file.name, input_video_path)
|
| 283 |
|
| 284 |
-
# Extract audio from video
|
| 285 |
audio_path, audio_temp_dir = extract_audio_from_video(str(input_video_path))
|
| 286 |
if not audio_path:
|
| 287 |
return None
|
| 288 |
|
| 289 |
-
# Process audio chunks
|
| 290 |
audio_output_path, background_path = asyncio.run(process_audio_chunks(audio_path, voice, target_language))
|
| 291 |
|
| 292 |
if audio_output_path is None or background_path is None:
|
| 293 |
return None
|
| 294 |
|
| 295 |
-
# Combine with original video
|
| 296 |
output_video_path = temp_dir / "translated_video.mp4"
|
| 297 |
success = combine_video_audio(str(input_video_path), audio_output_path, str(output_video_path))
|
| 298 |
|
| 299 |
if success:
|
| 300 |
-
# Return the path to the output video
|
| 301 |
return str(output_video_path)
|
| 302 |
else:
|
| 303 |
return None
|
|
@@ -305,14 +296,8 @@ def gradio_interface(video_file, voice, target_language):
|
|
| 305 |
except Exception as e:
|
| 306 |
print(f"Error processing video: {e}")
|
| 307 |
return None
|
| 308 |
-
finally:
|
| 309 |
-
# Cleanup temporary files
|
| 310 |
-
# Commented out for debugging purposes
|
| 311 |
-
# shutil.rmtree(temp_dir, ignore_errors=True)
|
| 312 |
-
pass
|
| 313 |
|
| 314 |
def extract_audio_from_video(video_path):
|
| 315 |
-
"""Extract audio from video file using ffmpeg"""
|
| 316 |
temp_dir = tempfile.mkdtemp()
|
| 317 |
audio_path = os.path.join(temp_dir, "extracted_audio.wav")
|
| 318 |
|
|
@@ -333,97 +318,30 @@ def extract_audio_from_video(video_path):
|
|
| 333 |
return None, None
|
| 334 |
|
| 335 |
def combine_video_audio(video_path, audio_path, output_path):
|
| 336 |
-
"""Combine original video with new audio track"""
|
| 337 |
try:
|
| 338 |
subprocess.run([
|
| 339 |
-
"ffmpeg", "-y",
|
|
|
|
| 340 |
"-i", audio_path,
|
| 341 |
-
"-c:v", "copy",
|
| 342 |
-
"-
|
|
|
|
|
|
|
| 343 |
], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
| 344 |
return True
|
| 345 |
except Exception as e:
|
| 346 |
-
print(f"Video
|
| 347 |
return False
|
| 348 |
|
| 349 |
-
#
|
| 350 |
-
voice_options = {
|
| 351 |
-
"Hindi": [
|
| 352 |
-
"hi-IN-MadhurNeural", # Male
|
| 353 |
-
"hi-IN-SwaraNeural" # Female
|
| 354 |
-
],
|
| 355 |
-
"English": [
|
| 356 |
-
"en-US-GuyNeural", # Male
|
| 357 |
-
"en-US-BenjaminRUS", # Male
|
| 358 |
-
"en-US-ChristopherNeural", # Male
|
| 359 |
-
"en-US-AriaNeural", # Female
|
| 360 |
-
"en-US-JessaNeural", # Female
|
| 361 |
-
"en-US-JennyNeural" # Female
|
| 362 |
-
],
|
| 363 |
-
"Spanish": [
|
| 364 |
-
"es-ES-AlvaroNeural", # Male
|
| 365 |
-
"es-MX-JorgeNeural", # Male
|
| 366 |
-
"es-US-AlonsoNeural", # Male
|
| 367 |
-
"es-ES-ElviraNeural", # Female
|
| 368 |
-
"es-MX-DaliaNeural", # Female
|
| 369 |
-
"es-US-PalomaNeural" # Female
|
| 370 |
-
],
|
| 371 |
-
"French": [
|
| 372 |
-
"fr-FR-HenriNeural", # Male
|
| 373 |
-
"fr-FR-RemyMultilingualNeural", # Male
|
| 374 |
-
"fr-CA-AntoineNeural", # Male
|
| 375 |
-
"fr-FR-DeniseNeural", # Female
|
| 376 |
-
"fr-FR-JulieNeural", # Female
|
| 377 |
-
"fr-FR-VivienneMultilingualNeural" # Female
|
| 378 |
-
],
|
| 379 |
-
"Japanese": [
|
| 380 |
-
"ja-JP-KeitaNeural", # Male
|
| 381 |
-
"ja-JP-DaichiNeural", # Male
|
| 382 |
-
"ja-JP-RikuNeural", # Male
|
| 383 |
-
"ja-JP-AoiNeural", # Female
|
| 384 |
-
"ja-JP-NanamiNeural", # Female
|
| 385 |
-
"ja-JP-ShioriNeural" # Female
|
| 386 |
-
],
|
| 387 |
-
"Korean": [
|
| 388 |
-
"ko-KR-InJoonNeural", # Male
|
| 389 |
-
"ko-KR-SunHiNeural" # Female
|
| 390 |
-
]
|
| 391 |
-
}
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
# Create Gradio interface
|
| 395 |
with gr.Blocks() as demo:
|
| 396 |
-
gr.Markdown("#
|
| 397 |
-
gr.
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
language_dropdown = gr.Dropdown(
|
| 404 |
-
list(voice_options.keys()),
|
| 405 |
-
label="Translate to",
|
| 406 |
-
value="Hindi"
|
| 407 |
-
)
|
| 408 |
-
voice_dropdown = gr.Dropdown(
|
| 409 |
-
voice_options["Hindi"],
|
| 410 |
-
label="Select Voice",
|
| 411 |
-
value="hi-IN-MadhurNeural"
|
| 412 |
-
)
|
| 413 |
-
|
| 414 |
-
output_video = gr.Video(label="Dubbed Video")
|
| 415 |
-
|
| 416 |
-
submit_btn = gr.Button("Start Dubbing")
|
| 417 |
-
|
| 418 |
-
def update_voice_options(language):
|
| 419 |
-
return gr.update(choices=voice_options[language], value=voice_options[language][0])
|
| 420 |
-
|
| 421 |
-
language_dropdown.change(update_voice_options, inputs=[language_dropdown], outputs=[voice_dropdown])
|
| 422 |
|
| 423 |
-
|
| 424 |
-
gradio_interface,
|
| 425 |
-
inputs=[video_input, voice_dropdown, language_dropdown],
|
| 426 |
-
outputs=output_video
|
| 427 |
-
)
|
| 428 |
|
| 429 |
-
demo.
|
|
|
|
| 54 |
class AudioProcessor:
|
| 55 |
def __init__(self, device="cpu"):
|
| 56 |
self.whisper_model = WhisperModel("small", device=device)
|
| 57 |
+
self.openrouter_api_key = "your_openrouter_api_key_here"
|
| 58 |
self.client = OpenAI(
|
| 59 |
base_url="https://openrouter.ai/api/v1",
|
| 60 |
api_key=self.openrouter_api_key,
|
|
|
|
| 122 |
messages=[
|
| 123 |
{
|
| 124 |
"role": "system",
|
| 125 |
+
"content": f"You are a professional translator from Given language to {target_language}. Translate exactly as requested."
|
| 126 |
},
|
| 127 |
{
|
| 128 |
"role": "user",
|
|
|
|
| 260 |
final_mix.export(output_path, format="wav")
|
| 261 |
print(f"✅ Output saved as: {output_path}")
|
| 262 |
|
|
|
|
|
|
|
|
|
|
| 263 |
cleanup_files(chunk_files)
|
| 264 |
shutil.rmtree(temp_dir, ignore_errors=True)
|
| 265 |
+
return output_path, background_path
|
| 266 |
|
| 267 |
# --- Gradio Interface ---
|
| 268 |
def gradio_interface(video_file, voice, target_language):
|
| 269 |
try:
|
|
|
|
| 270 |
temp_dir = Path(tempfile.mkdtemp())
|
| 271 |
input_video_path = temp_dir / "input_video.mp4"
|
| 272 |
|
|
|
|
| 274 |
if not os.path.splitext(video_file.name)[1].lower() in ['.mp4', '.mov', '.avi', '.mkv']:
|
| 275 |
raise ValueError("Invalid file type. Please upload a video file.")
|
| 276 |
|
|
|
|
| 277 |
shutil.copyfile(video_file.name, input_video_path)
|
| 278 |
|
|
|
|
| 279 |
audio_path, audio_temp_dir = extract_audio_from_video(str(input_video_path))
|
| 280 |
if not audio_path:
|
| 281 |
return None
|
| 282 |
|
|
|
|
| 283 |
audio_output_path, background_path = asyncio.run(process_audio_chunks(audio_path, voice, target_language))
|
| 284 |
|
| 285 |
if audio_output_path is None or background_path is None:
|
| 286 |
return None
|
| 287 |
|
|
|
|
| 288 |
output_video_path = temp_dir / "translated_video.mp4"
|
| 289 |
success = combine_video_audio(str(input_video_path), audio_output_path, str(output_video_path))
|
| 290 |
|
| 291 |
if success:
|
|
|
|
| 292 |
return str(output_video_path)
|
| 293 |
else:
|
| 294 |
return None
|
|
|
|
| 296 |
except Exception as e:
|
| 297 |
print(f"Error processing video: {e}")
|
| 298 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 299 |
|
| 300 |
def extract_audio_from_video(video_path):
|
|
|
|
| 301 |
temp_dir = tempfile.mkdtemp()
|
| 302 |
audio_path = os.path.join(temp_dir, "extracted_audio.wav")
|
| 303 |
|
|
|
|
| 318 |
return None, None
|
| 319 |
|
| 320 |
def combine_video_audio(video_path, audio_path, output_path):
|
|
|
|
| 321 |
try:
|
| 322 |
subprocess.run([
|
| 323 |
+
"ffmpeg", "-y",
|
| 324 |
+
"-i", video_path,
|
| 325 |
"-i", audio_path,
|
| 326 |
+
"-c:v", "copy",
|
| 327 |
+
"-c:a", "aac",
|
| 328 |
+
"-strict", "experimental",
|
| 329 |
+
output_path
|
| 330 |
], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
| 331 |
return True
|
| 332 |
except Exception as e:
|
| 333 |
+
print(f"Video/audio combine error: {e}")
|
| 334 |
return False
|
| 335 |
|
| 336 |
+
# --- Gradio UI Setup ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 337 |
with gr.Blocks() as demo:
|
| 338 |
+
gr.Markdown("# Video Dubbing & Translation App")
|
| 339 |
+
video_input = gr.File(label="Upload Video", file_types=['.mp4', '.mov', '.avi', '.mkv'])
|
| 340 |
+
voice_selector = gr.Dropdown(choices=["en-US-JennyNeural", "en-GB-RyanNeural", "hi-IN-SwaraNeural"], label="Select Voice", value="en-US-JennyNeural")
|
| 341 |
+
target_lang = gr.Textbox(label="Target Language (e.g. Hindi, French, Spanish)", value="Hindi")
|
| 342 |
+
translate_btn = gr.Button("Translate & Dub")
|
| 343 |
+
output_video = gr.Video(label="Output Video")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
|
| 345 |
+
translate_btn.click(fn=gradio_interface, inputs=[video_input, voice_selector, target_lang], outputs=output_video)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
|
| 347 |
+
demo.launch()
|