vasugo05 commited on
Commit
bb4dfcd
·
verified ·
1 Parent(s): 21b4fb4

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +265 -0
app.py ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Main Gradio application interface for AudioDubb
3
+ Provides web UI for audio dubbing on Hugging Face Spaces
4
+ """
5
+
6
+ import logging
7
+ import gradio as gr
8
+ import os
9
+ import tempfile
10
+ from typing import Optional, Tuple, Union
11
+ from pathlib import Path
12
+ from src.core.pipeline import DubbingPipeline
13
+ from src.core.audio_processor import AudioProcessor
14
+ logging.basicConfig(
15
+ level=logging.INFO,
16
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
17
+ )
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # Initialize pipeline
21
+ pipeline = DubbingPipeline()
22
+ audio_processor = AudioProcessor()
23
+
24
+ # Disclaimer text
25
+ DISCLAIMER = """
26
+ ⚠️ **IMPORTANT DISCLAIMER** ⚠️
27
+
28
+ AudioDubb is designed for **educational and personal use only**.
29
+
30
+ **Please use responsibly:**
31
+ - Do not use this tool to create misleading or deceptive content
32
+ - Do not impersonate individuals without their explicit consent
33
+ - Respect intellectual property rights and privacy
34
+ - Follow all applicable laws and regulations in your jurisdiction
35
+
36
+ **Privacy Notice:**
37
+ - All audio files are processed in-memory only
38
+ - No audio files, transcriptions, or data are stored or logged
39
+ - All temporary files are deleted immediately after processing
40
+ - Your data is never transmitted to external servers beyond necessary model APIs
41
+
42
+ By using AudioDubb, you agree to use it responsibly and ethically.
43
+ """
44
+
45
+
46
+ def process_audio(
47
+ input_audio: Union[str, Tuple],
48
+ target_language: str,
49
+ source_language: Optional[str] = None,
50
+ speed: float = 1.0,
51
+ emotion: str = "neutral"
52
+ ) -> Tuple[Optional[str], str]:
53
+ """
54
+ Main processing function for the Gradio interface
55
+
56
+ Args:
57
+ input_audio: Uploaded audio file (path or tuple from Gradio)
58
+ target_language: Target language for dubbing
59
+ source_language: Source language (optional, auto-detected)
60
+ speed: Speech speed multiplier
61
+ emotion: Emotion to apply
62
+
63
+ Returns:
64
+ Tuple of (output_audio_path, status_message)
65
+ """
66
+ temp_files_to_cleanup = []
67
+
68
+ try:
69
+ # Handle Gradio audio input (can be tuple or file path)
70
+ if isinstance(input_audio, tuple):
71
+ sample_rate, audio_data = input_audio
72
+ temp_input = audio_processor.create_temp_audio_file()
73
+ temp_files_to_cleanup.append(temp_input)
74
+ audio_processor.save_audio(audio_data, sample_rate, temp_input)
75
+ input_audio_path = temp_input
76
+ else:
77
+ input_audio_path = input_audio
78
+
79
+ logger.info(f"Processing audio with target language: {target_language}")
80
+
81
+ # Validate audio file
82
+ if not audio_processor.validate_audio_file(input_audio_path):
83
+ return None, "❌ Unsupported audio format. Please use WAV, MP3, M4A, FLAC, or OGG."
84
+
85
+ # Process through pipeline
86
+ output_path, metadata = pipeline.dub_audio(
87
+ input_audio_path=input_audio_path,
88
+ target_language=target_language,
89
+ source_language=source_language,
90
+ speed=speed,
91
+ emotion=emotion
92
+ )
93
+
94
+ # Build success message
95
+ status_msg = f"""
96
+ ✅ **Dubbing Complete!**
97
+
98
+ **Source Details:**
99
+ - Original Language: {metadata['stages']['transcription']['source_language']}
100
+ - Duration: {metadata['output_duration_seconds']:.2f} seconds
101
+
102
+ **Translation:**
103
+ - Target Language: {metadata['stages']['translation']['target_language']}
104
+
105
+ **Output:**
106
+ - Download your dubbed audio below
107
+ - Quality: High-fidelity voice cloning with emotion preservation
108
+ """
109
+
110
+ logger.info("Audio processing completed successfully")
111
+ return output_path, status_msg
112
+
113
+ except Exception as e:
114
+ error_msg = f"❌ **Error Processing Audio**: {str(e)}\n\nPlease check your audio file and try again."
115
+ logger.error(f"Error in process_audio: {str(e)}")
116
+ return None, error_msg
117
+
118
+ finally:
119
+ # Cleanup temporary files
120
+ for temp_file in temp_files_to_cleanup:
121
+ audio_processor.cleanup_temp_file(temp_file)
122
+
123
+
124
+ def get_supported_languages():
125
+ """Get supported languages for the interface"""
126
+ supported = pipeline.get_supported_languages()
127
+ return supported["languages"]
128
+
129
+
130
+ def create_interface():
131
+ """Create and return the Gradio interface"""
132
+
133
+ supported_langs = get_supported_languages()
134
+
135
+ with gr.Blocks(theme=gr.themes.Soft(), title="AudioDubb - Multilingual Audio Dubbing") as demo:
136
+
137
+ # Header
138
+ gr.Markdown("""
139
+ # 🎙️ AudioDubb
140
+ ## AI-Powered Multilingual Audio Dubbing
141
+
142
+ Transform your audio into any language while preserving the original speaker's voice, emotion, and natural expression.
143
+ """)
144
+
145
+ # Disclaimer
146
+ gr.Markdown(DISCLAIMER)
147
+
148
+ # Main content
149
+ with gr.Row():
150
+ with gr.Column(scale=1):
151
+ gr.Markdown("### Input")
152
+
153
+ # Audio upload
154
+ input_audio = gr.Audio(
155
+ label="Upload Audio File",
156
+ sources=["upload", "microphone"]
157
+ )
158
+
159
+ # Language selection
160
+ target_language = gr.Dropdown(
161
+ choices=supported_langs,
162
+ value="english",
163
+ label="Target Language",
164
+ interactive=True
165
+ )
166
+
167
+ # Advanced options
168
+ with gr.Accordion("Advanced Options", open=False):
169
+ source_language = gr.Dropdown(
170
+ choices=["Auto-Detect"] + supported_langs,
171
+ value="Auto-Detect",
172
+ label="Source Language",
173
+ interactive=True
174
+ )
175
+
176
+ speed = gr.Slider(
177
+ minimum=0.5,
178
+ maximum=2.0,
179
+ value=1.0,
180
+ step=0.1,
181
+ label="Speech Speed"
182
+ )
183
+
184
+ emotion = gr.Dropdown(
185
+ choices=["neutral", "happy", "sad", "angry", "surprised", "excited"],
186
+ value="neutral",
187
+ label="Emotion"
188
+ )
189
+
190
+ # Process button
191
+ process_btn = gr.Button(
192
+ "🎬 Start Dubbing",
193
+ variant="primary",
194
+ scale=1
195
+ )
196
+
197
+ with gr.Column(scale=1):
198
+ gr.Markdown("### Output")
199
+
200
+ # Status message
201
+ status_msg = gr.Markdown(
202
+ "Upload an audio file and select a target language to begin."
203
+ )
204
+
205
+ # Output audio
206
+ output_audio = gr.Audio(
207
+ label="Dubbed Audio"
208
+ )
209
+
210
+ # Download button (implicit in output_audio)
211
+ gr.Markdown(
212
+ "Right-click the audio player to download, or use the download button in the audio widget."
213
+ )
214
+
215
+ # Examples
216
+ gr.Markdown("### Examples")
217
+ gr.Markdown(
218
+ "Upload an audio file and select a target language to begin dubbing."
219
+ )
220
+
221
+ # Connect process button
222
+ process_btn.click(
223
+ fn=lambda audio, target_lang, src_lang, spd, emo: process_audio(
224
+ audio,
225
+ target_lang,
226
+ src_lang if src_lang != "Auto-Detect" else None,
227
+ spd,
228
+ emo
229
+ ),
230
+ inputs=[input_audio, target_language, source_language, speed, emotion],
231
+ outputs=[output_audio, status_msg]
232
+ )
233
+
234
+ # Footer
235
+ gr.Markdown("""
236
+ ---
237
+ **AudioDubb** | AI-Powered Multilingual Dubbing Engine
238
+ - Built for Hugging Face Spaces
239
+ - Supports 20+ languages
240
+ - Privacy-first: No data storage or logging
241
+ - Educational and personal use only
242
+ """)
243
+
244
+ return demo
245
+
246
+
247
+ if __name__ == "__main__":
248
+ try:
249
+ # Create and launch interface
250
+ demo = create_interface()
251
+
252
+ # Launch with configuration for Hugging Face Spaces ONLY
253
+ # This application runs EXCLUSIVELY on Hugging Face Spaces
254
+ # All processing is cloud-based with no local storage
255
+ demo.launch(
256
+ server_name="0.0.0.0",
257
+ server_port=7860,
258
+ share=False,
259
+ show_error=True,
260
+ show_api=False,
261
+ max_threads=10
262
+ )
263
+ except Exception as e:
264
+ logger.error(f"Failed to launch interface: {str(e)}")
265
+ raise