testingfaces commited on
Commit
7c3a40a
·
verified ·
1 Parent(s): 6c1060d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -156
app.py CHANGED
@@ -1,190 +1,131 @@
1
- # ── Python 3.13 compatibility patch (must be first) ──
2
  import sys
3
  import types
4
- _audioop = types.ModuleType('audioop')
5
- sys.modules['audioop'] = _audioop
6
- sys.modules['pyaudioop'] = _audioop
7
-
8
- """
9
- ClearWave AI - Cloud Audio Processing Pipeline
10
- Deployed on Hugging Face Spaces
11
- """
12
 
13
  import gradio as gr
14
  import os
15
  import time
16
  import tempfile
17
  import shutil
 
 
18
 
19
- from denoiser import Denoiser
20
- from transcriber import Transcriber
21
- from translator import Translator
22
-
23
- # Init all 3 departments ONCE at startup
24
- print("ClearWave AI starting up...")
25
- denoiser = Denoiser()
26
- transcriber = Transcriber()
27
- translator = Translator()
28
- print("All 3 departments ready!")
29
 
30
- # ── Language mappings ─────────────────────────────────
31
- INPUT_LANG_MAP = {
32
- "Auto Detect": "auto",
33
- "English": "en",
34
- "Telugu": "te",
35
- "Hindi": "hi",
36
- "Tamil": "ta",
37
- "Kannada": "kn",
38
- }
39
 
40
- OUTPUT_LANG_MAP = {
41
- "Telugu": "te",
42
- "Hindi": "hi",
43
- "Tamil": "ta",
44
- "English": "en",
45
- "Kannada": "kn",
46
  }
47
 
48
- LANG_BADGES = {
49
- "en": "English",
50
- "te": "Telugu",
51
- "hi": "Hindi",
52
- "ta": "Tamil",
53
- "kn": "Kannada",
54
- "auto": "Auto-detected",
55
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
- # ── Core pipeline ─────────────────────────────────────
58
- def process_audio(audio_path, input_lang_label, output_lang_label, progress=gr.Progress()):
59
  if audio_path is None:
60
- return None, "Please upload an audio file.", "", "", "No audio uploaded"
61
-
62
- input_lang = INPUT_LANG_MAP.get(input_lang_label, "auto")
63
- output_lang = OUTPUT_LANG_MAP.get(output_lang_label, "te")
64
-
65
- temp_dir = tempfile.mkdtemp(prefix="clearwave_")
66
- timings = {}
67
- total_start = time.time()
68
-
69
  try:
70
- # Dept 1: Denoise
71
- progress(0.05, desc="Dept 1 - Denoising audio...")
72
  t0 = time.time()
73
- denoised_path = denoiser.process(audio_path, temp_dir)
74
- timings["denoise"] = time.time() - t0
75
- progress(0.40, desc=f"Denoised in {timings['denoise']:.1f}s")
76
-
77
- # Dept 2: Transcribe
78
- progress(0.45, desc="Dept 2 - Transcribing with Groq Whisper...")
79
  t0 = time.time()
80
- transcript, detected_lang, tx_method = transcriber.transcribe(
81
- denoised_path, language=input_lang
82
- )
83
- timings["transcribe"] = time.time() - t0
84
- progress(0.75, desc=f"Transcribed in {timings['transcribe']:.1f}s [{tx_method}]")
85
-
86
- # Dept 3: Translate
87
- progress(0.80, desc="Dept 3 - Translating with NLLB-200...")
88
  t0 = time.time()
89
- effective_src = detected_lang if input_lang == "auto" else input_lang
90
- if effective_src == output_lang:
91
- translated = transcript
92
- tr_method = "skipped (same language)"
93
- else:
94
- translated, tr_method = translator.translate(
95
- transcript, src_lang=effective_src, tgt_lang=output_lang
96
- )
97
- timings["translate"] = time.time() - t0
98
- progress(0.95, desc=f"Translated in {timings['translate']:.1f}s [{tr_method}]")
99
-
100
- total_time = time.time() - total_start
101
-
102
- src_badge = LANG_BADGES.get(effective_src, "Unknown")
103
- tgt_badge = LANG_BADGES.get(output_lang, "Unknown")
104
-
105
- transcript_md = f"**{src_badge}**\n\n{transcript}"
106
- translated_md = f"**{tgt_badge}**\n\n{translated}"
107
-
108
- timing_md = (
109
- f"### Processing Times\n\n"
110
- f"| Department | Time | Method |\n"
111
- f"|---|---|---|\n"
112
- f"| Denoiser (Dept 1) | `{timings['denoise']:.1f}s` | noisereduce |\n"
113
- f"| Transcriber (Dept 2) | `{timings['transcribe']:.1f}s` | {tx_method} |\n"
114
- f"| Translator (Dept 3) | `{timings['translate']:.1f}s` | {tr_method} |\n"
115
- f"| **Total** | **`{total_time:.1f}s`** | 3-dept pipeline |"
116
- )
117
-
118
- progress(1.0, desc=f"Complete! {total_time:.1f}s")
119
-
120
- out_audio = os.path.join(temp_dir, "clearwave_denoised.wav")
121
- shutil.copy(denoised_path, out_audio)
122
-
123
- return out_audio, transcript_md, translated_md, timing_md, f"Done in {total_time:.1f}s"
124
-
125
  except Exception as e:
126
  import traceback
127
- err = traceback.format_exc()
128
- print(f"Pipeline error:\n{err}")
129
- shutil.rmtree(temp_dir, ignore_errors=True)
130
- return None, f"Error: {str(e)}", "", f"```\n{err}\n```", f"Failed: {str(e)}"
131
-
132
 
133
- # ── Gradio UI ─────────────────────────────────────────
134
  with gr.Blocks(title="ClearWave AI") as demo:
135
-
136
- gr.Markdown("""
137
- # ClearWave AI
138
- **3-Department Audio Pipeline: Denoise → Transcribe → Translate**
139
- """)
140
-
141
  with gr.Row():
142
  with gr.Column(scale=1):
143
- audio_in = gr.Audio(
144
- label="Upload or Record Audio",
145
- type="filepath",
146
- sources=["upload", "microphone"],
147
- )
148
- input_lang = gr.Dropdown(
149
- label="Input Language",
150
- choices=list(INPUT_LANG_MAP.keys()),
151
- value="Auto Detect",
152
- )
153
- output_lang = gr.Dropdown(
154
- label="Output Language",
155
- choices=list(OUTPUT_LANG_MAP.keys()),
156
- value="Telugu",
157
- )
158
- run_btn = gr.Button("Process Audio", variant="primary", size="lg")
159
- status_md = gr.Markdown("Upload audio and press Process.")
160
-
161
  with gr.Column(scale=2):
162
  with gr.Tabs():
163
- with gr.Tab("Text Results"):
164
  with gr.Row():
165
  with gr.Column():
166
- gr.Markdown("#### Original Transcript")
167
- transcript_out = gr.Markdown("Will appear here...")
168
  with gr.Column():
169
  gr.Markdown("#### Translation")
170
- translation_out = gr.Markdown("Will appear here...")
171
-
172
  with gr.Tab("Clean Audio"):
173
- audio_out = gr.Audio(
174
- label="Denoised Audio",
175
- type="filepath",
176
- interactive=False,
177
- )
178
-
179
  with gr.Tab("Timings"):
180
- timing_out = gr.Markdown("Timings will appear after processing...")
181
-
182
- run_btn.click(
183
- fn=process_audio,
184
- inputs=[audio_in, input_lang, output_lang],
185
- outputs=[audio_out, transcript_out, translation_out, timing_out, status_md],
186
- show_progress=True,
187
- )
188
 
189
- if __name__ == "__main__":
190
- demo.launch()
 
1
+ # Fix pydub on Python 3.13
2
  import sys
3
  import types
4
+ _a = types.ModuleType('audioop')
5
+ sys.modules['audioop'] = _a
6
+ sys.modules['pyaudioop'] = _a
 
 
 
 
 
7
 
8
  import gradio as gr
9
  import os
10
  import time
11
  import tempfile
12
  import shutil
13
+ import subprocess
14
+ import numpy as np
15
 
16
+ print("ClearWave AI starting...")
 
 
 
 
 
 
 
 
 
17
 
18
+ INPUT_LANGS = ["Auto Detect","English","Telugu","Hindi","Tamil","Kannada"]
19
+ OUTPUT_LANGS = ["Telugu","Hindi","Tamil","English","Kannada"]
 
 
 
 
 
 
 
20
 
21
+ LANG_CODES = {
22
+ "Auto Detect":"auto","English":"en","Telugu":"te",
23
+ "Hindi":"hi","Tamil":"ta","Kannada":"kn"
 
 
 
24
  }
25
 
26
+ def denoise(audio_path, out_dir):
27
+ import soundfile as sf
28
+ import noisereduce as nr
29
+ wav = os.path.join(out_dir, "input.wav")
30
+ subprocess.run(["ffmpeg","-y","-i",audio_path,"-ar","16000","-ac","1","-f","wav",wav],capture_output=True)
31
+ data, sr = sf.read(wav)
32
+ data = data.astype(np.float32)
33
+ cleaned = nr.reduce_noise(y=data, sr=sr).astype(np.float32)
34
+ peak = np.abs(cleaned).max()
35
+ if peak > 0:
36
+ cleaned = cleaned / peak * 0.9
37
+ out = os.path.join(out_dir, "denoised.wav")
38
+ sf.write(out, cleaned, sr)
39
+ return out
40
+
41
+ def transcribe(audio_path, language="auto"):
42
+ groq_key = os.environ.get("GROQ_API_KEY","")
43
+ if not groq_key:
44
+ return "No GROQ_API_KEY set.", "en", "no key"
45
+ from groq import Groq
46
+ client = Groq(api_key=groq_key)
47
+ with open(audio_path, "rb") as f:
48
+ kwargs = dict(file=f, model="whisper-large-v3", response_format="verbose_json", temperature=0.0)
49
+ if language and language != "auto":
50
+ kwargs["language"] = language
51
+ resp = client.audio.transcriptions.create(**kwargs)
52
+ text = resp.text.strip()
53
+ lang = getattr(resp, "language", language or "en") or "en"
54
+ lang_map = {"english":"en","telugu":"te","hindi":"hi","tamil":"ta","kannada":"kn"}
55
+ lang = lang_map.get(lang.lower(), lang[:2].lower() if len(lang)>=2 else lang)
56
+ return text, lang, "Groq Whisper large-v3"
57
+
58
+ def translate(text, src, tgt):
59
+ if src == tgt or not text.strip():
60
+ return text, "skipped"
61
+ try:
62
+ from deep_translator import GoogleTranslator
63
+ result = GoogleTranslator(source=src, target=tgt).translate(text)
64
+ return result, "Google Translate"
65
+ except Exception as e:
66
+ return f"Translation failed: {e}", "error"
67
 
68
+ def process(audio_path, in_lang_label, out_lang_label, progress=gr.Progress()):
 
69
  if audio_path is None:
70
+ return None, "Please upload audio.", "", "", "No audio"
71
+ in_lang = LANG_CODES.get(in_lang_label, "auto")
72
+ out_lang = LANG_CODES.get(out_lang_label, "te")
73
+ tmp = tempfile.mkdtemp()
74
+ t_total = time.time()
 
 
 
 
75
  try:
76
+ progress(0.1, desc="Denoising...")
 
77
  t0 = time.time()
78
+ clean = denoise(audio_path, tmp)
79
+ t1 = time.time() - t0
80
+ progress(0.4, desc="Transcribing with Groq...")
 
 
 
81
  t0 = time.time()
82
+ transcript, detected, tx_m = transcribe(clean, in_lang)
83
+ t2 = time.time() - t0
84
+ progress(0.75, desc="Translating...")
 
 
 
 
 
85
  t0 = time.time()
86
+ src = detected if in_lang == "auto" else in_lang
87
+ translated, tr_m = translate(transcript, src, out_lang)
88
+ t3 = time.time() - t0
89
+ total = time.time() - t_total
90
+ progress(1.0, desc=f"Done in {total:.1f}s!")
91
+ timing = (f"| Step | Time | Method |\n|---|---|---|\n"
92
+ f"| Denoise | {t1:.1f}s | noisereduce |\n"
93
+ f"| Transcribe | {t2:.1f}s | {tx_m} |\n"
94
+ f"| Translate | {t3:.1f}s | {tr_m} |\n"
95
+ f"| **Total** | **{total:.1f}s** | |")
96
+ out_audio = os.path.join(tmp, "output.wav")
97
+ shutil.copy(clean, out_audio)
98
+ return out_audio, transcript, translated, timing, f"Done in {total:.1f}s"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  except Exception as e:
100
  import traceback
101
+ return None, f"Error: {e}", "", traceback.format_exc(), "Failed"
 
 
 
 
102
 
 
103
  with gr.Blocks(title="ClearWave AI") as demo:
104
+ gr.Markdown("# ClearWave AI\n**Denoise → Transcribe → Translate**")
 
 
 
 
 
105
  with gr.Row():
106
  with gr.Column(scale=1):
107
+ audio_in = gr.Audio(label="Upload Audio", type="filepath", sources=["upload","microphone"])
108
+ in_lang = gr.Dropdown(INPUT_LANGS, value="Auto Detect", label="Input Language")
109
+ out_lang = gr.Dropdown(OUTPUT_LANGS, value="Telugu", label="Output Language")
110
+ run_btn = gr.Button("Process Audio", variant="primary", size="lg")
111
+ status = gr.Markdown("Upload audio and click Process.")
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  with gr.Column(scale=2):
113
  with gr.Tabs():
114
+ with gr.Tab("Text"):
115
  with gr.Row():
116
  with gr.Column():
117
+ gr.Markdown("#### Transcript")
118
+ transcript_out = gr.Markdown("...")
119
  with gr.Column():
120
  gr.Markdown("#### Translation")
121
+ translation_out = gr.Markdown("...")
 
122
  with gr.Tab("Clean Audio"):
123
+ audio_out = gr.Audio(label="Denoised", type="filepath", interactive=False)
 
 
 
 
 
124
  with gr.Tab("Timings"):
125
+ timing_out = gr.Markdown("...")
126
+ run_btn.click(fn=process, inputs=[audio_in, in_lang, out_lang],
127
+ outputs=[audio_out, transcript_out, translation_out, timing_out, status],
128
+ show_progress=True)
 
 
 
 
129
 
130
+ print("ClearWave AI ready!")
131
+ demo.launch()