mohitrai76 commited on
Commit
8d5f088
·
verified ·
1 Parent(s): 0824e67

Update app.py

Browse files

Added CSS part

Files changed (1) hide show
  1. app.py +236 -77
app.py CHANGED
@@ -14,16 +14,15 @@ import gradio as gr
14
  import requests
15
 
16
  # --- Demucs-based vocal separation ---
17
- def separate_vocals(input_path):
18
  """Use Demucs to separate vocals and background music"""
 
19
  temp_dir = tempfile.mkdtemp()
20
  try:
21
  output_dir = os.path.join(temp_dir, "separated")
22
  os.makedirs(output_dir, exist_ok=True)
23
-
24
  from demucs.separate import main as demucs_main
25
  import sys
26
-
27
  original_argv = sys.argv
28
  sys.argv = [
29
  "demucs",
@@ -31,19 +30,16 @@ def separate_vocals(input_path):
31
  "-o", output_dir,
32
  input_path
33
  ]
34
-
35
  try:
36
  demucs_main()
37
  finally:
38
  sys.argv = original_argv
39
-
40
  base_name = Path(input_path).stem
41
  vocals_path = os.path.join(output_dir, "htdemucs", base_name, "vocals.wav")
42
  noise_path = os.path.join(output_dir, "htdemucs", base_name, "no_vocals.wav")
43
-
44
  if not os.path.exists(vocals_path) or not os.path.exists(noise_path):
45
  raise FileNotFoundError("Demucs output missing")
46
-
47
  return vocals_path, noise_path, temp_dir
48
  except Exception as e:
49
  print(f"Demucs error: {e}")
@@ -64,35 +60,31 @@ class AudioProcessor:
64
  "X-Title": "Audio Translation App"
65
  })
66
  )
67
-
68
- def transcribe_audio_with_pauses(self, audio_path):
69
  segments, _ = self.whisper_model.transcribe(audio_path, word_timestamps=True)
70
  previous_end = 0.0
71
  results = []
72
-
73
  for segment in segments:
74
  if segment.start > previous_end + 0.5:
75
  results.append((previous_end, segment.start, None))
76
  results.append((segment.start, segment.end, segment.text.strip()))
77
  previous_end = segment.end
78
-
79
  audio_duration = get_audio_duration(audio_path)
80
  if audio_duration and audio_duration > previous_end + 0.5:
81
  results.append((previous_end, audio_duration, None))
82
-
83
  return results
84
 
85
- def translate_segments_batch(self, segments, target_language):
86
  """Translate all text segments in a single batch request"""
 
87
  try:
88
  # Filter out None segments (pauses)
89
  text_segments = [seg for seg in segments if seg is not None]
90
-
91
  if not text_segments:
92
  return segments # Return original if no text to translate
93
-
94
  print(f"Translating {len(text_segments)} segments in batch...")
95
-
96
  # Prepare the prompt with clear formatting instructions
97
  prompt = f"""Translate the following text segments to {target_language} while maintaining EXACTLY the same format and order:
98
  {chr(10).join(text_segments)}
@@ -102,9 +94,10 @@ class AudioProcessor:
102
  3. Use natural conversational {target_language}
103
  4. Preserve meaning/context
104
  5. Leave proper nouns unchanged
105
- 6. Match original word count where possible
106
- 7. Output ONLY the translations, one per line, no numbers or bullet points
107
- 8. Do not add any additional text or explanations
 
108
  Example Input:
109
  Hello world
110
  How are you?
@@ -112,7 +105,6 @@ class AudioProcessor:
112
  नमस्ते दुनिया
113
  आप कैसे हैं?
114
  """
115
-
116
  completion = self.client.chat.completions.create(
117
  model="gpt-3.5-turbo",
118
  messages=[
@@ -128,14 +120,11 @@ class AudioProcessor:
128
  temperature=0.1, # Lower temperature for more consistent results
129
  max_tokens=2000
130
  )
131
-
132
  translated_text = completion.choices[0].message.content.strip()
133
  translations = translated_text.split('\n')
134
-
135
  # Reconstruct the segments with translations
136
  translated_segments = []
137
  translation_idx = 0
138
-
139
  for seg in segments:
140
  if seg is None:
141
  translated_segments.append(None)
@@ -145,9 +134,8 @@ class AudioProcessor:
145
  translation_idx += 1
146
  else:
147
  translated_segments.append(seg) # Fallback to original if missing translation
148
-
149
  return translated_segments
150
-
151
  except Exception as e:
152
  print(f"Batch translation error: {e}")
153
  return segments # Return original segments if translation fails
@@ -166,7 +154,6 @@ async def synthesize_tts_to_wav(text, voice, target_language):
166
  temp_mp3 = "temp_tts.mp3"
167
  communicate = edge_tts.Communicate(text, voice)
168
  await communicate.save(temp_mp3)
169
-
170
  audio = AudioSegment.from_file(temp_mp3)
171
  audio = audio.set_channels(1).set_frame_rate(22050)
172
  output_wav = "temp_tts.wav"
@@ -180,16 +167,13 @@ def stretch_audio(input_wav, target_duration, api_url="https://sox-api.onrender.
180
  files = {"file": f}
181
  data = {"target_duration": str(target_duration)}
182
  response = requests.post(api_url, files=files, data=data)
183
-
184
  # Check if the request was successful
185
  if response.status_code != 200:
186
  raise RuntimeError(f"API error: {response.status_code} - {response.text}")
187
-
188
  # Save the response content to a temporary file
189
  output_wav = tempfile.mkstemp(suffix=".wav")[1]
190
  with open(output_wav, "wb") as out:
191
  out.write(response.content)
192
-
193
  return output_wav
194
 
195
  def generate_silence_wav(duration_s, output_path, sample_rate=22050):
@@ -202,44 +186,40 @@ def cleanup_files(file_list):
202
  os.remove(file)
203
 
204
  # --- Main Process Function ---
205
- async def process_audio_chunks(input_audio_path, voice, target_language):
206
  audio_processor = AudioProcessor()
207
-
208
  print("🔎 Separating vocals and music using Demucs...")
209
- vocals_path, background_path, temp_dir = separate_vocals(input_audio_path)
210
  if not vocals_path:
211
  return None, None
212
 
213
  print("🔎 Transcribing vocals...")
214
- segments = audio_processor.transcribe_audio_with_pauses(vocals_path)
215
  print(f"Transcribed {len(segments)} segments.")
216
 
217
  # Extract text segments for batch processing
218
  segment_texts = [seg[2] if seg[2] is not None else None for seg in segments]
219
 
220
  # Batch translate all segments at once
221
- translated_texts = audio_processor.translate_segments_batch(segment_texts, target_language)
222
 
223
  chunk_files = []
224
  chunk_idx = 0
225
-
226
  for (start, end, _), translated in zip(segments, translated_texts):
227
  duration = end - start
228
  chunk_idx += 1
229
-
230
  if translated is None:
231
  filename = f"chunk_{chunk_idx:03d}_pause.wav"
232
  generate_silence_wav(duration, filename)
233
  chunk_files.append(filename)
234
  else:
235
  print(f"🔤 {chunk_idx}: Translated: {translated}")
236
-
237
  # Synthesize TTS audio
238
  raw_tts = await synthesize_tts_to_wav(translated, voice, target_language)
239
-
240
  # Stretch the audio to match the target duration
241
  stretched = stretch_audio(raw_tts, duration)
242
-
243
  chunk_files.append(stretched)
244
  os.remove(raw_tts)
245
 
@@ -251,53 +231,52 @@ async def process_audio_chunks(input_audio_path, voice, target_language):
251
  background_music = AudioSegment.from_wav(background_path)
252
  background_music = background_music[:len(combined_tts)]
253
  final_mix = combined_tts.overlay(background_music)
254
-
255
  output_path = "final_translated_with_music.wav"
256
  final_mix.export(output_path, format="wav")
257
  print(f"✅ Output saved as: {output_path}")
258
 
259
  final_audio_path = output_path
260
- final_background_path = background_path
261
 
262
  cleanup_files(chunk_files)
263
  shutil.rmtree(temp_dir, ignore_errors=True)
 
264
  return final_audio_path, final_background_path
265
 
266
  # --- Gradio Interface ---
267
- def gradio_interface(video_file, voice, target_language):
268
  try:
 
269
  # Create temporary directory for processing
270
  temp_dir = Path(tempfile.mkdtemp())
271
  input_video_path = temp_dir / "input_video.mp4"
272
-
273
  # Check if file is a video
274
  if not os.path.splitext(video_file.name)[1].lower() in ['.mp4', '.mov', '.avi', '.mkv']:
275
  raise ValueError("Invalid file type. Please upload a video file.")
276
-
277
  # Save the uploaded file to the temporary directory
278
  shutil.copyfile(video_file.name, input_video_path)
279
 
280
  # Extract audio from video
 
281
  audio_path, audio_temp_dir = extract_audio_from_video(str(input_video_path))
282
  if not audio_path:
283
  return None
284
 
285
  # Process audio chunks
286
- audio_output_path, background_path = asyncio.run(process_audio_chunks(audio_path, voice, target_language))
287
-
288
  if audio_output_path is None or background_path is None:
289
  return None
290
 
291
  # Combine with original video
 
292
  output_video_path = temp_dir / "translated_video.mp4"
293
  success = combine_video_audio(str(input_video_path), audio_output_path, str(output_video_path))
294
-
295
  if success:
 
296
  # Return the path to the output video
297
  return str(output_video_path)
298
  else:
299
  return None
300
-
301
  except Exception as e:
302
  print(f"Error processing video: {e}")
303
  return None
@@ -311,17 +290,14 @@ def extract_audio_from_video(video_path):
311
  """Extract audio from video file using ffmpeg"""
312
  temp_dir = tempfile.mkdtemp()
313
  audio_path = os.path.join(temp_dir, "extracted_audio.wav")
314
-
315
  try:
316
  subprocess.run([
317
  "ffmpeg", "-y", "-i", video_path,
318
  "-vn", "-acodec", "pcm_s16le", "-ar", "44100", "-ac", "2",
319
  audio_path
320
  ], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
321
-
322
  if not os.path.exists(audio_path):
323
  raise FileNotFoundError("Audio extraction failed")
324
-
325
  return audio_path, temp_dir
326
  except Exception as e:
327
  print(f"Audio extraction error: {e}")
@@ -349,8 +325,7 @@ voice_options = {
349
  "hi-IN-SwaraNeural" # Female
350
  ],
351
  "English": [
352
- "en-US-GuyNeural", # Male
353
- "en-US-BenjaminRUS", # Male
354
  "en-US-ChristopherNeural", # Male
355
  "en-US-AriaNeural", # Female
356
  "en-US-JessaNeural", # Female
@@ -359,8 +334,7 @@ voice_options = {
359
  "Spanish": [
360
  "es-ES-AlvaroNeural", # Male
361
  "es-MX-JorgeNeural", # Male
362
- "es-US-AlonsoNeural", # Male
363
- "es-ES-ElviraNeural", # Female
364
  "es-MX-DaliaNeural", # Female
365
  "es-US-PalomaNeural" # Female
366
  ],
@@ -368,56 +342,241 @@ voice_options = {
368
  "fr-FR-HenriNeural", # Male
369
  "fr-FR-RemyMultilingualNeural", # Male
370
  "fr-CA-AntoineNeural", # Male
371
- "fr-FR-DeniseNeural", # Female
372
- "fr-FR-JulieNeural", # Female
373
  "fr-FR-VivienneMultilingualNeural" # Female
374
  ],
375
  "Japanese": [
376
- "ja-JP-KeitaNeural", # Male
377
- "ja-JP-DaichiNeural", # Male
378
- "ja-JP-RikuNeural", # Male
379
- "ja-JP-AoiNeural", # Female
380
- "ja-JP-NanamiNeural", # Female
381
- "ja-JP-ShioriNeural" # Female
382
  ],
383
  "Korean": [
384
  "ko-KR-InJoonNeural", # Male
385
  "ko-KR-SunHiNeural" # Female
386
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
387
  }
388
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
389
 
390
- # Create Gradio interface
391
- with gr.Blocks() as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
392
  gr.Markdown("# DeepDub : Video Dubbing Application")
393
  gr.Markdown("Upload a video and get a dubbed version with translated audio")
394
 
395
  with gr.Row():
396
  video_input = gr.File(label="Upload Video", file_types=[".mp4", ".mov", ".avi", ".mkv"])
397
- language_dropdown = gr.Dropdown(
 
 
398
  list(voice_options.keys()),
399
- label="Translate to",
400
- value="Hindi"
 
401
  )
402
- voice_dropdown = gr.Dropdown(
 
 
403
  voice_options["Hindi"],
404
  label="Select Voice",
405
- value="hi-IN-MadhurNeural"
 
406
  )
407
-
408
  output_video = gr.Video(label="Dubbed Video")
409
-
410
  submit_btn = gr.Button("Start Dubbing")
411
 
412
  def update_voice_options(language):
 
413
  return gr.update(choices=voice_options[language], value=voice_options[language][0])
414
 
415
- language_dropdown.change(update_voice_options, inputs=[language_dropdown], outputs=[voice_dropdown])
 
 
 
 
 
416
 
417
  submit_btn.click(
418
  gradio_interface,
419
- inputs=[video_input, voice_dropdown, language_dropdown],
420
- outputs=output_video
 
421
  )
422
 
423
- demo.queue().launch(server_name="0.0.0.0", debug=True)
 
14
  import requests
15
 
16
  # --- Demucs-based vocal separation ---
17
+ def separate_vocals(input_path, progress=gr.Progress()):
18
  """Use Demucs to separate vocals and background music"""
19
+ progress(0.1, desc="Separating vocals and music (Demucs)")
20
  temp_dir = tempfile.mkdtemp()
21
  try:
22
  output_dir = os.path.join(temp_dir, "separated")
23
  os.makedirs(output_dir, exist_ok=True)
 
24
  from demucs.separate import main as demucs_main
25
  import sys
 
26
  original_argv = sys.argv
27
  sys.argv = [
28
  "demucs",
 
30
  "-o", output_dir,
31
  input_path
32
  ]
 
33
  try:
34
  demucs_main()
35
  finally:
36
  sys.argv = original_argv
 
37
  base_name = Path(input_path).stem
38
  vocals_path = os.path.join(output_dir, "htdemucs", base_name, "vocals.wav")
39
  noise_path = os.path.join(output_dir, "htdemucs", base_name, "no_vocals.wav")
 
40
  if not os.path.exists(vocals_path) or not os.path.exists(noise_path):
41
  raise FileNotFoundError("Demucs output missing")
42
+ progress(0.3, desc="Vocals separated")
43
  return vocals_path, noise_path, temp_dir
44
  except Exception as e:
45
  print(f"Demucs error: {e}")
 
60
  "X-Title": "Audio Translation App"
61
  })
62
  )
63
+ def transcribe_audio_with_pauses(self, audio_path, progress):
64
+ progress(0.35, desc="Transcribing audio (Whisper)")
65
  segments, _ = self.whisper_model.transcribe(audio_path, word_timestamps=True)
66
  previous_end = 0.0
67
  results = []
 
68
  for segment in segments:
69
  if segment.start > previous_end + 0.5:
70
  results.append((previous_end, segment.start, None))
71
  results.append((segment.start, segment.end, segment.text.strip()))
72
  previous_end = segment.end
 
73
  audio_duration = get_audio_duration(audio_path)
74
  if audio_duration and audio_duration > previous_end + 0.5:
75
  results.append((previous_end, audio_duration, None))
76
+ progress(0.5, desc="Transcription complete")
77
  return results
78
 
79
+ def translate_segments_batch(self, segments, target_language, progress):
80
  """Translate all text segments in a single batch request"""
81
+ progress(0.55, desc="Translating segments")
82
  try:
83
  # Filter out None segments (pauses)
84
  text_segments = [seg for seg in segments if seg is not None]
 
85
  if not text_segments:
86
  return segments # Return original if no text to translate
 
87
  print(f"Translating {len(text_segments)} segments in batch...")
 
88
  # Prepare the prompt with clear formatting instructions
89
  prompt = f"""Translate the following text segments to {target_language} while maintaining EXACTLY the same format and order:
90
  {chr(10).join(text_segments)}
 
94
  3. Use natural conversational {target_language}
95
  4. Preserve meaning/context
96
  5. Leave proper nouns unchanged
97
+ 6.Make sure the translated sentence is meaningful also
98
+ 7. Match original word count where possible
99
+ 8. Output ONLY the translations, one per line, no numbers or bullet points
100
+ 9. Do not add any additional text or explanations
101
  Example Input:
102
  Hello world
103
  How are you?
 
105
  नमस्ते दुनिया
106
  आप कैसे हैं?
107
  """
 
108
  completion = self.client.chat.completions.create(
109
  model="gpt-3.5-turbo",
110
  messages=[
 
120
  temperature=0.1, # Lower temperature for more consistent results
121
  max_tokens=2000
122
  )
 
123
  translated_text = completion.choices[0].message.content.strip()
124
  translations = translated_text.split('\n')
 
125
  # Reconstruct the segments with translations
126
  translated_segments = []
127
  translation_idx = 0
 
128
  for seg in segments:
129
  if seg is None:
130
  translated_segments.append(None)
 
134
  translation_idx += 1
135
  else:
136
  translated_segments.append(seg) # Fallback to original if missing translation
137
+ progress(0.7, desc="Translation complete")
138
  return translated_segments
 
139
  except Exception as e:
140
  print(f"Batch translation error: {e}")
141
  return segments # Return original segments if translation fails
 
154
  temp_mp3 = "temp_tts.mp3"
155
  communicate = edge_tts.Communicate(text, voice)
156
  await communicate.save(temp_mp3)
 
157
  audio = AudioSegment.from_file(temp_mp3)
158
  audio = audio.set_channels(1).set_frame_rate(22050)
159
  output_wav = "temp_tts.wav"
 
167
  files = {"file": f}
168
  data = {"target_duration": str(target_duration)}
169
  response = requests.post(api_url, files=files, data=data)
 
170
  # Check if the request was successful
171
  if response.status_code != 200:
172
  raise RuntimeError(f"API error: {response.status_code} - {response.text}")
 
173
  # Save the response content to a temporary file
174
  output_wav = tempfile.mkstemp(suffix=".wav")[1]
175
  with open(output_wav, "wb") as out:
176
  out.write(response.content)
 
177
  return output_wav
178
 
179
  def generate_silence_wav(duration_s, output_path, sample_rate=22050):
 
186
  os.remove(file)
187
 
188
  # --- Main Process Function ---
189
+ async def process_audio_chunks(input_audio_path, voice, target_language, progress):
190
  audio_processor = AudioProcessor()
 
191
  print("🔎 Separating vocals and music using Demucs...")
192
+ vocals_path, background_path, temp_dir = separate_vocals(input_audio_path, progress)
193
  if not vocals_path:
194
  return None, None
195
 
196
  print("🔎 Transcribing vocals...")
197
+ segments = audio_processor.transcribe_audio_with_pauses(vocals_path, progress)
198
  print(f"Transcribed {len(segments)} segments.")
199
 
200
  # Extract text segments for batch processing
201
  segment_texts = [seg[2] if seg[2] is not None else None for seg in segments]
202
 
203
  # Batch translate all segments at once
204
+ translated_texts = audio_processor.translate_segments_batch(segment_texts, target_language, progress)
205
 
206
  chunk_files = []
207
  chunk_idx = 0
208
+ total_segments = len(segments)
209
  for (start, end, _), translated in zip(segments, translated_texts):
210
  duration = end - start
211
  chunk_idx += 1
212
+ progress(0.7 + (chunk_idx / total_segments) * 0.15, desc=f"Processing chunk {chunk_idx}/{total_segments}")
213
  if translated is None:
214
  filename = f"chunk_{chunk_idx:03d}_pause.wav"
215
  generate_silence_wav(duration, filename)
216
  chunk_files.append(filename)
217
  else:
218
  print(f"🔤 {chunk_idx}: Translated: {translated}")
 
219
  # Synthesize TTS audio
220
  raw_tts = await synthesize_tts_to_wav(translated, voice, target_language)
 
221
  # Stretch the audio to match the target duration
222
  stretched = stretch_audio(raw_tts, duration)
 
223
  chunk_files.append(stretched)
224
  os.remove(raw_tts)
225
 
 
231
  background_music = AudioSegment.from_wav(background_path)
232
  background_music = background_music[:len(combined_tts)]
233
  final_mix = combined_tts.overlay(background_music)
 
234
  output_path = "final_translated_with_music.wav"
235
  final_mix.export(output_path, format="wav")
236
  print(f"✅ Output saved as: {output_path}")
237
 
238
  final_audio_path = output_path
239
+ final_background_path = background_path # Keep this for cleanup if needed
240
 
241
  cleanup_files(chunk_files)
242
  shutil.rmtree(temp_dir, ignore_errors=True)
243
+ progress(0.9, desc="Audio processing complete")
244
  return final_audio_path, final_background_path
245
 
246
  # --- Gradio Interface ---
247
+ def gradio_interface(video_file, voice, target_language, progress=gr.Progress()):
248
  try:
249
+ progress(0.05, desc="Starting video dubbing process")
250
  # Create temporary directory for processing
251
  temp_dir = Path(tempfile.mkdtemp())
252
  input_video_path = temp_dir / "input_video.mp4"
 
253
  # Check if file is a video
254
  if not os.path.splitext(video_file.name)[1].lower() in ['.mp4', '.mov', '.avi', '.mkv']:
255
  raise ValueError("Invalid file type. Please upload a video file.")
 
256
  # Save the uploaded file to the temporary directory
257
  shutil.copyfile(video_file.name, input_video_path)
258
 
259
  # Extract audio from video
260
+ progress(0.1, desc="Extracting audio from video")
261
  audio_path, audio_temp_dir = extract_audio_from_video(str(input_video_path))
262
  if not audio_path:
263
  return None
264
 
265
  # Process audio chunks
266
+ audio_output_path, background_path = asyncio.run(process_audio_chunks(audio_path, voice, target_language, progress))
 
267
  if audio_output_path is None or background_path is None:
268
  return None
269
 
270
  # Combine with original video
271
+ progress(0.95, desc="Combining video and new audio")
272
  output_video_path = temp_dir / "translated_video.mp4"
273
  success = combine_video_audio(str(input_video_path), audio_output_path, str(output_video_path))
 
274
  if success:
275
+ progress(1.0, desc="Dubbing complete!")
276
  # Return the path to the output video
277
  return str(output_video_path)
278
  else:
279
  return None
 
280
  except Exception as e:
281
  print(f"Error processing video: {e}")
282
  return None
 
290
  """Extract audio from video file using ffmpeg"""
291
  temp_dir = tempfile.mkdtemp()
292
  audio_path = os.path.join(temp_dir, "extracted_audio.wav")
 
293
  try:
294
  subprocess.run([
295
  "ffmpeg", "-y", "-i", video_path,
296
  "-vn", "-acodec", "pcm_s16le", "-ar", "44100", "-ac", "2",
297
  audio_path
298
  ], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 
299
  if not os.path.exists(audio_path):
300
  raise FileNotFoundError("Audio extraction failed")
 
301
  return audio_path, temp_dir
302
  except Exception as e:
303
  print(f"Audio extraction error: {e}")
 
325
  "hi-IN-SwaraNeural" # Female
326
  ],
327
  "English": [
328
+ "en-US-GuyNeural", # Male
 
329
  "en-US-ChristopherNeural", # Male
330
  "en-US-AriaNeural", # Female
331
  "en-US-JessaNeural", # Female
 
334
  "Spanish": [
335
  "es-ES-AlvaroNeural", # Male
336
  "es-MX-JorgeNeural", # Male
337
+ "es-US-AlonsoNeural", # Female
 
338
  "es-MX-DaliaNeural", # Female
339
  "es-US-PalomaNeural" # Female
340
  ],
 
342
  "fr-FR-HenriNeural", # Male
343
  "fr-FR-RemyMultilingualNeural", # Male
344
  "fr-CA-AntoineNeural", # Male
345
+ "fr-FR-DeniseNeural",
 
346
  "fr-FR-VivienneMultilingualNeural" # Female
347
  ],
348
  "Japanese": [
349
+ "ja-JP-KeitaNeural",
350
+ "ja-JP-NanamiNeural"
 
 
 
 
351
  ],
352
  "Korean": [
353
  "ko-KR-InJoonNeural", # Male
354
  "ko-KR-SunHiNeural" # Female
355
+ ]}
356
+
357
+ custom_css = """
358
+ /* Overall Body Background - Deep & Vibrant Gradient */
359
+ body {
360
+ background: linear-gradient(135deg, #1A202C, #2D3748, #4A5568) !important; /* Dark blue-grey gradient */
361
+ font-family: 'Inter', sans-serif; /* Modern font, ensure it's available or use fallback */
362
+ color: #E2E8F0; /* Light text color for contrast */
363
+ overflow-x: hidden;
364
+ }
365
+
366
+ /* --- Core Gradio Block Blending --- */
367
+ /* Make Gradio's main container transparent to show body background */
368
+ .gradio-container {
369
+ background: transparent !important;
370
+ box-shadow: none !important;
371
+ border: none !important;
372
+ padding: 0 !important;
373
+ }
374
+
375
+ /* Specific Gradio block elements - subtle transparency */
376
+ .block {
377
+ background-color: hsla(210, 20%, 25%, 0.5) !important; /* Semi-transparent dark blue-grey */
378
+ backdrop-filter: blur(8px); /* Frosted glass effect */
379
+ border: 1px solid hsla(210, 20%, 35%, 0.6) !important; /* Subtle border */
380
+ border-radius: 20px !important; /* Rounded corners for the block */
381
+ box-shadow: 0 8px 30px hsla(0, 0%, 0%, 0.3) !important; /* Stronger shadow for depth */
382
+ margin-bottom: 25px !important;
383
+ padding: 25px !important; /* Add internal padding to blocks */
384
  }
385
 
386
+ /* Remove default Gradio layout wrappers' backgrounds */
387
+ .main-wrapper, .panel-container {
388
+ background: transparent !important;
389
+ box-shadow: none !important;
390
+ border: none !important;
391
+ }
392
+
393
+ /* --- Application Title and Description --- */
394
+ .gradio-header h1 {
395
+ color: #8D5BFC !important; /* Vibrant Purple for main title */
396
+ font-size: 3em !important;
397
+ text-shadow: 0 0 15px hsla(260, 90%, 70%, 0.5); /* Glowing effect */
398
+ margin-bottom: 10px !important;
399
+ font-weight: 700 !important;
400
+ text-align: center;
401
+ }
402
+
403
+ .gradio-markdown p {
404
+ color: #CBD5E0 !important; /* Lighter text for description */
405
+ font-size: 1.25em !important;
406
+ text-align: center;
407
+ margin-bottom: 40px !important;
408
+ font-weight: 300;
409
+ }
410
 
411
+ /* --- Input Components (File, Dropdowns) --- */
412
+ .gradio-file, .gradio-dropdown {
413
+ background-color: hsla(210, 20%, 18%, 0.7) !important; /* Darker, slightly transparent */
414
+ border: 1px solid hsla(240, 60%, 70%, 0.4) !important; /* Subtle blue border */
415
+ border-radius: 15px !important;
416
+ padding: 12px 18px !important;
417
+ color: #E2E8F0 !important; /* Light text for input */
418
+ font-size: 1.1em !important;
419
+ transition: all 0.3s ease;
420
+ box-shadow: 0 4px 15px hsla(0, 0%, 0%, 0.2);
421
+ }
422
+
423
+ .gradio-file input[type="file"] {
424
+ color: #E2E8F0 !important;
425
+ }
426
+
427
+ .gradio-file:hover, .gradio-dropdown:hover {
428
+ border-color: #A78BFA !important; /* Lighter purple on hover */
429
+ box-shadow: 0 6px 20px hsla(0, 0%, 0%, 0.3);
430
+ }
431
+
432
+ /* Focus state for inputs */
433
+ .gradio-dropdown.gr-text-input:focus,
434
+ .gradio-file input:focus {
435
+ border-color: #8D5BFC !important; /* Vibrant purple on focus */
436
+ box-shadow: 0 0 20px hsla(260, 90%, 70%, 0.5);
437
+ background-color: hsla(210, 20%, 20%, 0.9) !important; /* Slightly less transparent */
438
+ }
439
+
440
+ /* Labels for inputs */
441
+ .gradio-label {
442
+ color: #A78BFA !important; /* Soft purple for labels */
443
+ font-weight: 600 !important;
444
+ font-size: 1.15em !important;
445
+ margin-bottom: 8px !important;
446
+ text-align: left;
447
+ width: 100%;
448
+ }
449
+
450
+ /* --- Submit Button --- */
451
+ .gradio-button {
452
+ background: linear-gradient(90deg, #FF6B8B, #FF8E53) !important; /* Vibrant Pink to Orange gradient */
453
+ color: white !important;
454
+ border: none !important;
455
+ border-radius: 30px !important;
456
+ padding: 15px 35px !important;
457
+ font-size: 1.3em !important;
458
+ font-weight: bold !important;
459
+ cursor: pointer !important;
460
+ transition: all 0.3s ease !important;
461
+ box-shadow: 0 8px 25px hsla(0, 0%, 0%, 0.4) !important;
462
+ margin-top: 35px !important;
463
+ min-width: 220px;
464
+ align-self: center;
465
+ text-transform: uppercase; /* Make button text uppercase */
466
+ letter-spacing: 1px;
467
+ }
468
+
469
+ .gradio-button:hover {
470
+ background: linear-gradient(90deg, #FF4B7B, #FF7E43) !important;
471
+ box-shadow: 0 10px 30px hsla(0, 0%, 0%, 0.5) !important;
472
+ transform: translateY(-3px) !important;
473
+ }
474
+
475
+ /* --- Output Video Player --- */
476
+ .gradio-video {
477
+ background-color: hsla(210, 20%, 15%, 0.8) !important; /* Darker, more opaque background for video */
478
+ border: 2px solid #8D5BFC !important; /* Vibrant purple border for the video player */
479
+ border-radius: 20px !important;
480
+ padding: 15px !important;
481
+ box-shadow: 0 10px 40px hsla(0, 0%, 0%, 0.5) !important; /* Stronger shadow */
482
+ margin-top: 40px !important;
483
+ }
484
+
485
+ /* --- Translated Text Output --- */
486
+ .gradio-markdown-output, .gradio-textbox {
487
+ background-color: hsla(210, 20%, 18%, 0.7) !important;
488
+ border: 1px solid hsla(240, 60%, 70%, 0.4) !important;
489
+ border-radius: 15px !important;
490
+ padding: 20px !important;
491
+ color: #E2E8F0 !important;
492
+ font-size: 1.0em !important;
493
+ min-height: 200px; /* Give it some height */
494
+ overflow-y: auto; /* Enable scrolling for long text */
495
+ white-space: pre-wrap; /* Preserve line breaks */
496
+ box-shadow: 0 4px 15px hsla(0, 0%, 0%, 0.2);
497
+ }
498
+
499
+ /* Flexbox for the Row to control spacing and alignment */
500
+ .gradio-row {
501
+ display: flex;
502
+ justify-content: space-around; /* Distribute items with space around */
503
+ align-items: flex-start; /* Align items to the start of the cross-axis */
504
+ gap: 20px; /* Space between items in the row */
505
+ flex-wrap: wrap; /* Allow items to wrap on smaller screens */
506
+ }
507
+
508
+ /* Ensure individual components in a row take up appropriate space */
509
+ .gradio-row > .gradio-component {
510
+ flex: 1; /* Allow components to grow and shrink */
511
+ min-width: 250px; /* Minimum width for components in a row */
512
+ }
513
+
514
+ /* Adjust padding for gr.Blocks content */
515
+ .gr-box {
516
+ padding: 0 !important; /* Remove internal padding if present to let elements breathe */
517
+ background: transparent !important;
518
+ box-shadow: none !important;
519
+ }
520
+ """
521
+ # Create Gradio interface with radio buttons for both language and voice selection
522
+ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(
523
+ primary_hue=gr.themes.Color(
524
+ c50='#e6e9ff', c100='#c2c9ff', c200='#9faaff', c300='#7c8bff', c400='#5a6bff',
525
+ c500='#384aff', c600='#2c38cc', c700='#202b99', c800='#141d66', c900='#080e33',
526
+ c950='#04071a'
527
+ ),
528
+ secondary_hue=gr.themes.Color(
529
+ c50='#fff0e6', c100='#ffe0cc', c200='#ffb380', c300='#ff8533', c400='#ff5700',
530
+ c500='#cc4600', c600='#993400', c700='#662200', c800='#331100', c900='#1a0900',
531
+ c950='#0d0500'
532
+ ),
533
+ neutral_hue=gr.themes.Color(
534
+ c50='#f8f8fa', c100='#f1f5f9', c200='#e2e8f0', c300='#cbd5e1', c400='#94a3b8',
535
+ c500='#64748b', c600='#475569', c700='#334155', c800='#1e293b', c900='#0f172a',
536
+ c950='#020617'
537
+ )
538
+ )) as demo:
539
  gr.Markdown("# DeepDub : Video Dubbing Application")
540
  gr.Markdown("Upload a video and get a dubbed version with translated audio")
541
 
542
  with gr.Row():
543
  video_input = gr.File(label="Upload Video", file_types=[".mp4", ".mov", ".avi", ".mkv"])
544
+
545
+ # Use Radio buttons for language selection
546
+ language_radio = gr.Radio(
547
  list(voice_options.keys()),
548
+ label="Target Language",
549
+ value="Hindi",
550
+ interactive=True
551
  )
552
+
553
+ # Use Radio buttons for voice selection
554
+ voice_radio = gr.Radio(
555
  voice_options["Hindi"],
556
  label="Select Voice",
557
+ value=voice_options["Hindi"][0],
558
+ interactive=True
559
  )
560
+
561
  output_video = gr.Video(label="Dubbed Video")
 
562
  submit_btn = gr.Button("Start Dubbing")
563
 
564
  def update_voice_options(language):
565
+ # Update voice radio buttons based on selected language
566
  return gr.update(choices=voice_options[language], value=voice_options[language][0])
567
 
568
+ # Update voice options when language changes
569
+ language_radio.change(
570
+ update_voice_options,
571
+ inputs=[language_radio],
572
+ outputs=[voice_radio]
573
+ )
574
 
575
  submit_btn.click(
576
  gradio_interface,
577
+ inputs=[video_input, voice_radio, language_radio],
578
+ outputs=output_video,
579
+ api_name="dub_video"
580
  )
581
 
582
+ demo.queue().launch(server_name="0.0.0.0", debug=True, share=True)