testingfaces commited on
Commit
9ea4293
·
verified ·
1 Parent(s): ceabe9e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -153
app.py CHANGED
@@ -1,10 +1,16 @@
 
 
 
 
 
 
 
1
  """
2
  ClearWave AI - Cloud Audio Processing Pipeline
3
- Deployed on Hugging Face Spaces with ZeroGPU
4
  """
5
 
6
  import gradio as gr
7
- import spaces
8
  import os
9
  import time
10
  import tempfile
@@ -13,18 +19,15 @@ import shutil
13
  from denoiser import Denoiser
14
  from transcriber import Transcriber
15
  from translator import Translator
16
- # ─────────────────────────────────────────────
17
  # Init all 3 departments ONCE at startup
18
- # ─────────────────────────────────────────────
19
- print("🚀 ClearWave AI starting up...")
20
  denoiser = Denoiser()
21
  transcriber = Transcriber()
22
  translator = Translator()
23
- print("All 3 departments ready!")
24
 
25
- # ─────────────────────────────────────────────
26
- # Language mappings
27
- # ─────────────────────────────────────────────
28
  INPUT_LANG_MAP = {
29
  "Auto Detect": "auto",
30
  "English": "en",
@@ -43,218 +46,139 @@ OUTPUT_LANG_MAP = {
43
  }
44
 
45
  LANG_BADGES = {
46
- "en": "🇬🇧 English",
47
- "te": "🇮🇳 Telugu",
48
- "hi": "🇮🇳 Hindi",
49
- "ta": "🇮🇳 Tamil",
50
- "kn": "🇮🇳 Kannada",
51
- "auto": "🔍 Auto-detected",
52
  }
53
 
54
- # ─────────────────────────────────────────────
55
- # Core pipeline
56
- # ─────────────────────────────────────────────
57
- @spaces.GPU
58
  def process_audio(audio_path, input_lang_label, output_lang_label, progress=gr.Progress()):
59
  if audio_path is None:
60
- return None, "⚠️ Please upload an audio file.", "", "", "No audio uploaded"
61
 
62
  input_lang = INPUT_LANG_MAP.get(input_lang_label, "auto")
63
  output_lang = OUTPUT_LANG_MAP.get(output_lang_label, "te")
64
 
65
- temp_dir = tempfile.mkdtemp(prefix="clearwave_")
66
- timings = {}
67
  total_start = time.time()
68
 
69
  try:
70
- # ─── Dept 1: Denoise ─────────────────────────
71
- progress(0.05, desc="🎙️ Dept 1 Denoising audio with DeepFilterNet3…")
72
  t0 = time.time()
73
  denoised_path = denoiser.process(audio_path, temp_dir)
74
  timings["denoise"] = time.time() - t0
75
- progress(0.40, desc=f"Denoised in {timings['denoise']:.1f}s")
76
 
77
- # ─── Dept 2: Transcribe ───────────────────────
78
- progress(0.45, desc="📝 Dept 2 Transcribing with Groq Whisper large-v3…")
79
  t0 = time.time()
80
  transcript, detected_lang, tx_method = transcriber.transcribe(
81
  denoised_path, language=input_lang
82
  )
83
  timings["transcribe"] = time.time() - t0
84
- progress(0.75, desc=f"Transcribed in {timings['transcribe']:.1f}s [{tx_method}]")
85
 
86
- # ─── Dept 3: Translate ────────────────────────
87
- progress(0.80, desc="🌐 Dept 3 Translating with NLLB-200")
88
  t0 = time.time()
89
-
90
  effective_src = detected_lang if input_lang == "auto" else input_lang
91
  if effective_src == output_lang:
92
- translated = transcript
93
- tr_method = "skipped (same language)"
94
  else:
95
  translated, tr_method = translator.translate(
96
  transcript, src_lang=effective_src, tgt_lang=output_lang
97
  )
98
  timings["translate"] = time.time() - t0
99
- progress(0.95, desc=f"Translated in {timings['translate']:.1f}s [{tr_method}]")
100
 
101
  total_time = time.time() - total_start
102
 
103
- # ─── Format outputs ───────────────────────────
104
- src_badge = LANG_BADGES.get(effective_src, "🔍 Unknown")
105
- tgt_badge = LANG_BADGES.get(output_lang, "🌐")
106
 
107
  transcript_md = f"**{src_badge}**\n\n{transcript}"
108
  translated_md = f"**{tgt_badge}**\n\n{translated}"
109
 
110
  timing_md = (
111
- f"### ⏱️ Processing Times\n\n"
112
  f"| Department | Time | Method |\n"
113
  f"|---|---|---|\n"
114
- f"| 🎙️ Denoiser (Dept 1) | `{timings['denoise']:.1f}s` | DeepFilterNet3 |\n"
115
- f"| 📝 Transcriber (Dept 2) | `{timings['transcribe']:.1f}s` | {tx_method} |\n"
116
- f"| 🌐 Translator (Dept 3) | `{timings['translate']:.1f}s` | {tr_method} |\n"
117
- f"| **Total** | **`{total_time:.1f}s`** | 3-dept pipeline |\n\n"
118
- f"> Running on Hugging Face ZeroGPU (A10G 24GB) — 100% free"
119
  )
120
 
121
- progress(1.0, desc=f"🎉 Complete! {total_time:.1f}s total")
122
 
123
- # Copy denoised file to stable output path
124
  out_audio = os.path.join(temp_dir, "clearwave_denoised.wav")
125
  shutil.copy(denoised_path, out_audio)
126
 
127
- return (
128
- out_audio,
129
- transcript_md,
130
- translated_md,
131
- timing_md,
132
- f"✅ Pipeline complete in {total_time:.1f}s"
133
- )
134
 
135
  except Exception as e:
136
  import traceback
137
  err = traceback.format_exc()
138
- print(f"[ClearWave] Pipeline error:\n{err}")
139
- # Clean up temp on error
140
  shutil.rmtree(temp_dir, ignore_errors=True)
141
- return (
142
- None,
143
- f"❌ Error: {str(e)}",
144
- "",
145
- f"**Error details:**\n```\n{err}\n```",
146
- f"❌ Failed — {str(e)}"
147
- )
148
-
149
-
150
- # ─────────────────────────────────────────────
151
- # UI
152
- # ─────────────────────────────────────────────
153
- CSS = """
154
- body, .gradio-container { background:#0d1117 !important; color:#e6edf3 !important; }
155
-
156
- .header-wrap {
157
- background: linear-gradient(135deg,#161b22,#1c2128);
158
- border:1px solid #30363d; border-radius:12px;
159
- padding:28px 32px; margin-bottom:18px; text-align:center;
160
- }
161
- .header-wrap h1 {
162
- font-size:2.2em; font-weight:700; margin:0 0 6px;
163
- background:linear-gradient(90deg,#58a6ff,#3fb950,#f78166);
164
- -webkit-background-clip:text; -webkit-text-fill-color:transparent;
165
- }
166
- .header-wrap p { color:#8b949e; font-size:0.98em; margin:0; }
167
-
168
- .pipe-strip {
169
- display:flex; gap:8px; justify-content:center; flex-wrap:wrap; margin-bottom:14px;
170
- }
171
- .dept-pill {
172
- background:#21262d; border:1px solid #30363d;
173
- border-radius:20px; padding:5px 14px;
174
- font-size:0.82em; color:#8b949e;
175
- }
176
-
177
- .panel { background:#161b22 !important; border:1px solid #30363d !important; border-radius:10px !important; }
178
 
179
- footer { display:none !important; }
180
- """
181
 
182
- with gr.Blocks(css=CSS, title="ClearWave AI", theme=gr.themes.Base()) as demo:
 
183
 
184
- # Header
185
- gr.HTML("""
186
- <div class="header-wrap">
187
- <h1>🎵 ClearWave AI</h1>
188
- <p>Professional 3-Department Audio Processing Pipeline · ZeroGPU · 100% Free</p>
189
- </div>
190
- <div class="pipe-strip">
191
- <span class="dept-pill">🎙️ Dept 1 · DeepFilterNet3 Denoiser</span>
192
- <span class="dept-pill">📝 Dept 2 · Groq Whisper large-v3</span>
193
- <span class="dept-pill">🌐 Dept 3 · NLLB-200 Translator</span>
194
- </div>
195
  """)
196
 
197
- with gr.Row(equal_height=False):
198
-
199
- # ── Left: Input controls ──────────────────────
200
- with gr.Column(scale=1, min_width=280):
201
  audio_in = gr.Audio(
202
- label="🎤 Upload or Record Audio",
203
  type="filepath",
204
  sources=["upload", "microphone"],
205
  )
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
- with gr.Group():
208
- input_lang = gr.Dropdown(
209
- label="Input Language",
210
- choices=list(INPUT_LANG_MAP.keys()),
211
- value="Auto Detect",
212
- )
213
- output_lang = gr.Dropdown(
214
- label="Output Language",
215
- choices=list(OUTPUT_LANG_MAP.keys()),
216
- value="Telugu",
217
- )
218
-
219
- run_btn = gr.Button("⚡ Process Audio", variant="primary", size="lg")
220
- status_md = gr.Markdown("*Upload audio and press Process.*")
221
-
222
- # ── Right: Results ────────────────────────────
223
  with gr.Column(scale=2):
224
  with gr.Tabs():
225
- with gr.Tab("📝 Text Results"):
226
  with gr.Row():
227
  with gr.Column():
228
  gr.Markdown("#### Original Transcript")
229
- transcript_out = gr.Markdown("*Will appear here…*")
230
  with gr.Column():
231
  gr.Markdown("#### Translation")
232
- translation_out = gr.Markdown("*Will appear here…*")
233
 
234
- with gr.Tab("🎵 Clean Audio"):
235
  audio_out = gr.Audio(
236
- label="Denoised Audio (download)",
237
  type="filepath",
238
  interactive=False,
239
  )
240
- gr.Markdown(
241
- "*Noise-cancelled with DeepFilterNet3, "
242
- "normalized to EBU R128 broadcast standard.*"
243
- )
244
 
245
- with gr.Tab("⏱️ Timings"):
246
- timing_out = gr.Markdown("*Timings will appear after processing…*")
247
 
248
- # Footer
249
- gr.HTML("""
250
- <div style="text-align:center;padding:16px;color:#484f58;font-size:0.8em;
251
- border-top:1px solid #21262d;margin-top:16px;">
252
- ClearWave AI · DeepFilterNet3 + Groq Whisper large-v3 + NLLB-200-distilled-600M ·
253
- Hugging Face ZeroGPU (A10G 24GB)
254
- </div>
255
- """)
256
-
257
- # Wire up
258
  run_btn.click(
259
  fn=process_audio,
260
  inputs=[audio_in, input_lang, output_lang],
@@ -263,9 +187,4 @@ with gr.Blocks(css=CSS, title="ClearWave AI", theme=gr.themes.Base()) as demo:
263
  )
264
 
265
  if __name__ == "__main__":
266
- demo.launch(
267
- server_name="0.0.0.0",
268
- server_port=7860,
269
- show_error=True,
270
- max_file_size="100mb",
271
- )
 
1
+ # ── Python 3.13 compatibility patch (must be first) ──
2
+ import sys
3
+ import types
4
+ _audioop = types.ModuleType('audioop')
5
+ sys.modules['audioop'] = _audioop
6
+ sys.modules['pyaudioop'] = _audioop
7
+
8
  """
9
  ClearWave AI - Cloud Audio Processing Pipeline
10
+ Deployed on Hugging Face Spaces
11
  """
12
 
13
  import gradio as gr
 
14
  import os
15
  import time
16
  import tempfile
 
19
  from denoiser import Denoiser
20
  from transcriber import Transcriber
21
  from translator import Translator
22
+
23
  # Init all 3 departments ONCE at startup
24
+ print("ClearWave AI starting up...")
 
25
  denoiser = Denoiser()
26
  transcriber = Transcriber()
27
  translator = Translator()
28
+ print("All 3 departments ready!")
29
 
30
+ # ── Language mappings ─────────────────────────────────
 
 
31
  INPUT_LANG_MAP = {
32
  "Auto Detect": "auto",
33
  "English": "en",
 
46
  }
47
 
48
  LANG_BADGES = {
49
+ "en": "English",
50
+ "te": "Telugu",
51
+ "hi": "Hindi",
52
+ "ta": "Tamil",
53
+ "kn": "Kannada",
54
+ "auto": "Auto-detected",
55
  }
56
 
57
+ # ── Core pipeline ─────────────────────────────────────
 
 
 
58
  def process_audio(audio_path, input_lang_label, output_lang_label, progress=gr.Progress()):
59
  if audio_path is None:
60
+ return None, "Please upload an audio file.", "", "", "No audio uploaded"
61
 
62
  input_lang = INPUT_LANG_MAP.get(input_lang_label, "auto")
63
  output_lang = OUTPUT_LANG_MAP.get(output_lang_label, "te")
64
 
65
+ temp_dir = tempfile.mkdtemp(prefix="clearwave_")
66
+ timings = {}
67
  total_start = time.time()
68
 
69
  try:
70
+ # Dept 1: Denoise
71
+ progress(0.05, desc="Dept 1 - Denoising audio...")
72
  t0 = time.time()
73
  denoised_path = denoiser.process(audio_path, temp_dir)
74
  timings["denoise"] = time.time() - t0
75
+ progress(0.40, desc=f"Denoised in {timings['denoise']:.1f}s")
76
 
77
+ # Dept 2: Transcribe
78
+ progress(0.45, desc="Dept 2 - Transcribing with Groq Whisper...")
79
  t0 = time.time()
80
  transcript, detected_lang, tx_method = transcriber.transcribe(
81
  denoised_path, language=input_lang
82
  )
83
  timings["transcribe"] = time.time() - t0
84
+ progress(0.75, desc=f"Transcribed in {timings['transcribe']:.1f}s [{tx_method}]")
85
 
86
+ # Dept 3: Translate
87
+ progress(0.80, desc="Dept 3 - Translating with NLLB-200...")
88
  t0 = time.time()
 
89
  effective_src = detected_lang if input_lang == "auto" else input_lang
90
  if effective_src == output_lang:
91
+ translated = transcript
92
+ tr_method = "skipped (same language)"
93
  else:
94
  translated, tr_method = translator.translate(
95
  transcript, src_lang=effective_src, tgt_lang=output_lang
96
  )
97
  timings["translate"] = time.time() - t0
98
+ progress(0.95, desc=f"Translated in {timings['translate']:.1f}s [{tr_method}]")
99
 
100
  total_time = time.time() - total_start
101
 
102
+ src_badge = LANG_BADGES.get(effective_src, "Unknown")
103
+ tgt_badge = LANG_BADGES.get(output_lang, "Unknown")
 
104
 
105
  transcript_md = f"**{src_badge}**\n\n{transcript}"
106
  translated_md = f"**{tgt_badge}**\n\n{translated}"
107
 
108
  timing_md = (
109
+ f"### Processing Times\n\n"
110
  f"| Department | Time | Method |\n"
111
  f"|---|---|---|\n"
112
+ f"| Denoiser (Dept 1) | `{timings['denoise']:.1f}s` | noisereduce |\n"
113
+ f"| Transcriber (Dept 2) | `{timings['transcribe']:.1f}s` | {tx_method} |\n"
114
+ f"| Translator (Dept 3) | `{timings['translate']:.1f}s` | {tr_method} |\n"
115
+ f"| **Total** | **`{total_time:.1f}s`** | 3-dept pipeline |"
 
116
  )
117
 
118
+ progress(1.0, desc=f"Complete! {total_time:.1f}s")
119
 
 
120
  out_audio = os.path.join(temp_dir, "clearwave_denoised.wav")
121
  shutil.copy(denoised_path, out_audio)
122
 
123
+ return out_audio, transcript_md, translated_md, timing_md, f"Done in {total_time:.1f}s"
 
 
 
 
 
 
124
 
125
  except Exception as e:
126
  import traceback
127
  err = traceback.format_exc()
128
+ print(f"Pipeline error:\n{err}")
 
129
  shutil.rmtree(temp_dir, ignore_errors=True)
130
+ return None, f"Error: {str(e)}", "", f"```\n{err}\n```", f"Failed: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
 
 
132
 
133
+ # ── Gradio UI ─────────────────────────────────────────
134
+ with gr.Blocks(title="ClearWave AI") as demo:
135
 
136
+ gr.Markdown("""
137
+ # ClearWave AI
138
+ **3-Department Audio Pipeline: Denoise → Transcribe → Translate**
 
 
 
 
 
 
 
 
139
  """)
140
 
141
+ with gr.Row():
142
+ with gr.Column(scale=1):
 
 
143
  audio_in = gr.Audio(
144
+ label="Upload or Record Audio",
145
  type="filepath",
146
  sources=["upload", "microphone"],
147
  )
148
+ input_lang = gr.Dropdown(
149
+ label="Input Language",
150
+ choices=list(INPUT_LANG_MAP.keys()),
151
+ value="Auto Detect",
152
+ )
153
+ output_lang = gr.Dropdown(
154
+ label="Output Language",
155
+ choices=list(OUTPUT_LANG_MAP.keys()),
156
+ value="Telugu",
157
+ )
158
+ run_btn = gr.Button("Process Audio", variant="primary", size="lg")
159
+ status_md = gr.Markdown("Upload audio and press Process.")
160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  with gr.Column(scale=2):
162
  with gr.Tabs():
163
+ with gr.Tab("Text Results"):
164
  with gr.Row():
165
  with gr.Column():
166
  gr.Markdown("#### Original Transcript")
167
+ transcript_out = gr.Markdown("Will appear here...")
168
  with gr.Column():
169
  gr.Markdown("#### Translation")
170
+ translation_out = gr.Markdown("Will appear here...")
171
 
172
+ with gr.Tab("Clean Audio"):
173
  audio_out = gr.Audio(
174
+ label="Denoised Audio",
175
  type="filepath",
176
  interactive=False,
177
  )
 
 
 
 
178
 
179
+ with gr.Tab("Timings"):
180
+ timing_out = gr.Markdown("Timings will appear after processing...")
181
 
 
 
 
 
 
 
 
 
 
 
182
  run_btn.click(
183
  fn=process_audio,
184
  inputs=[audio_in, input_lang, output_lang],
 
187
  )
188
 
189
  if __name__ == "__main__":
190
+ demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)