Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,6 +8,7 @@ import json
|
|
| 8 |
import base64
|
| 9 |
import tempfile
|
| 10 |
import logging
|
|
|
|
| 11 |
import threading
|
| 12 |
import time
|
| 13 |
import requests
|
|
@@ -36,6 +37,40 @@ LANGUAGES_DISPLAY = {
|
|
| 36 |
}
|
| 37 |
OUT_LANGS = {k: v for k, v in LANGUAGES_DISPLAY.items() if k != "Auto Detect"}
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
|
| 41 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -121,7 +156,7 @@ def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
|
|
| 121 |
|
| 122 |
|
| 123 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 124 |
-
# GRADIO UI
|
| 125 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 126 |
def process_audio_gradio(audio_path, in_lang_name, out_lang_name,
|
| 127 |
opt_fillers, opt_stutters, opt_silences,
|
|
@@ -129,6 +164,10 @@ def process_audio_gradio(audio_path, in_lang_name, out_lang_name,
|
|
| 129 |
if audio_path is None:
|
| 130 |
yield ("β Please upload an audio file.", "", "", None, "", "")
|
| 131 |
return
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
src_lang = LANGUAGES_DISPLAY.get(in_lang_name, "auto")
|
| 133 |
tgt_lang = LANGUAGES_DISPLAY.get(out_lang_name, "te")
|
| 134 |
for result in run_pipeline(audio_path, src_lang, tgt_lang,
|
|
@@ -159,8 +198,11 @@ with gr.Blocks(title="ClearWave AI") as demo:
|
|
| 159 |
gr.Markdown("# π΅ ClearWave AI\n### Professional Audio Enhancement")
|
| 160 |
with gr.Row():
|
| 161 |
with gr.Column(scale=1):
|
| 162 |
-
audio_in = gr.Audio(
|
| 163 |
-
|
|
|
|
|
|
|
|
|
|
| 164 |
with gr.Row():
|
| 165 |
in_lang = gr.Dropdown(label="Input Language",
|
| 166 |
choices=list(LANGUAGES_DISPLAY.keys()),
|
|
@@ -215,6 +257,9 @@ async def api_health():
|
|
| 215 |
@demo.app.post("/api/process-url")
|
| 216 |
async def api_process_url(request: _Request):
|
| 217 |
data = await request.json()
|
|
|
|
|
|
|
|
|
|
| 218 |
audio_url = data.get("audioUrl")
|
| 219 |
audio_id = data.get("audioId", "")
|
| 220 |
src_lang = data.get("srcLang", "auto")
|
|
@@ -240,7 +285,16 @@ async def api_process_url(request: _Request):
|
|
| 240 |
try:
|
| 241 |
resp = requests.get(audio_url, timeout=60, stream=True)
|
| 242 |
resp.raise_for_status()
|
| 243 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
|
| 245 |
downloaded = 0
|
| 246 |
total = int(resp.headers.get("content-length", 0))
|
|
@@ -250,13 +304,17 @@ async def api_process_url(request: _Request):
|
|
| 250 |
downloaded += len(chunk)
|
| 251 |
if total:
|
| 252 |
pct = int(downloaded * 100 / total)
|
| 253 |
-
yield sse({"status": "processing", "step": 0,
|
|
|
|
| 254 |
tmp.close()
|
| 255 |
except Exception as e:
|
| 256 |
yield sse({"status": "error", "message": "Download failed: " + str(e)})
|
| 257 |
return
|
| 258 |
|
| 259 |
-
|
|
|
|
|
|
|
|
|
|
| 260 |
opt_fillers, opt_stutters, opt_silences,
|
| 261 |
opt_breaths, opt_mouth):
|
| 262 |
result["audioId"] = audio_id
|
|
@@ -264,6 +322,8 @@ async def api_process_url(request: _Request):
|
|
| 264 |
|
| 265 |
try:
|
| 266 |
os.unlink(tmp.name)
|
|
|
|
|
|
|
| 267 |
except Exception:
|
| 268 |
pass
|
| 269 |
|
|
@@ -276,7 +336,7 @@ async def api_process_url(request: _Request):
|
|
| 276 |
logger.info("β
/api/health and /api/process-url registered on demo.app")
|
| 277 |
|
| 278 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 279 |
-
# LAUNCH
|
| 280 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 281 |
if __name__ == "__main__":
|
| 282 |
demo.launch()
|
|
|
|
| 8 |
import base64
|
| 9 |
import tempfile
|
| 10 |
import logging
|
| 11 |
+
import subprocess
|
| 12 |
import threading
|
| 13 |
import time
|
| 14 |
import requests
|
|
|
|
| 37 |
}
|
| 38 |
OUT_LANGS = {k: v for k, v in LANGUAGES_DISPLAY.items() if k != "Auto Detect"}
|
| 39 |
|
| 40 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 41 |
+
# AUDIO FORMAT CONVERTER β supports .mpeg, .mp4, .m4a etc.
|
| 42 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 43 |
+
def convert_to_wav(audio_path: str) -> str:
|
| 44 |
+
"""
|
| 45 |
+
Convert any audio format (including .mpeg, .mp4, .m4a) to .wav
|
| 46 |
+
so the pipeline can process it reliably.
|
| 47 |
+
Returns path to converted .wav file (or original if already .wav).
|
| 48 |
+
"""
|
| 49 |
+
if audio_path is None:
|
| 50 |
+
return audio_path
|
| 51 |
+
ext = os.path.splitext(audio_path)[1].lower()
|
| 52 |
+
# Already a safe format β no conversion needed
|
| 53 |
+
if ext in [".wav", ".mp3", ".flac", ".ogg", ".aac"]:
|
| 54 |
+
return audio_path
|
| 55 |
+
# Convert .mpeg / .mp4 / .m4a / .wma / .amr etc. β .wav
|
| 56 |
+
try:
|
| 57 |
+
converted = audio_path + "_converted.wav"
|
| 58 |
+
result = subprocess.run([
|
| 59 |
+
"ffmpeg", "-y", "-i", audio_path,
|
| 60 |
+
"-ar", "16000",
|
| 61 |
+
"-ac", "1",
|
| 62 |
+
"-acodec", "pcm_s16le",
|
| 63 |
+
converted
|
| 64 |
+
], capture_output=True)
|
| 65 |
+
if result.returncode == 0 and os.path.exists(converted):
|
| 66 |
+
logger.info(f"Converted {ext} β .wav successfully")
|
| 67 |
+
return converted
|
| 68 |
+
else:
|
| 69 |
+
logger.warning(f"Conversion failed: {result.stderr.decode()}")
|
| 70 |
+
return audio_path
|
| 71 |
+
except Exception as e:
|
| 72 |
+
logger.warning(f"Conversion error: {e}")
|
| 73 |
+
return audio_path
|
| 74 |
|
| 75 |
|
| 76 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 156 |
|
| 157 |
|
| 158 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 159 |
+
# GRADIO UI
|
| 160 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 161 |
def process_audio_gradio(audio_path, in_lang_name, out_lang_name,
|
| 162 |
opt_fillers, opt_stutters, opt_silences,
|
|
|
|
| 164 |
if audio_path is None:
|
| 165 |
yield ("β Please upload an audio file.", "", "", None, "", "")
|
| 166 |
return
|
| 167 |
+
|
| 168 |
+
# β
Auto-convert .mpeg / .mp4 / .m4a and any unsupported format β .wav
|
| 169 |
+
audio_path = convert_to_wav(audio_path)
|
| 170 |
+
|
| 171 |
src_lang = LANGUAGES_DISPLAY.get(in_lang_name, "auto")
|
| 172 |
tgt_lang = LANGUAGES_DISPLAY.get(out_lang_name, "te")
|
| 173 |
for result in run_pipeline(audio_path, src_lang, tgt_lang,
|
|
|
|
| 198 |
gr.Markdown("# π΅ ClearWave AI\n### Professional Audio Enhancement")
|
| 199 |
with gr.Row():
|
| 200 |
with gr.Column(scale=1):
|
| 201 |
+
audio_in = gr.Audio(
|
| 202 |
+
label="π Upload Audio (MP3, WAV, MPEG, MP4, AAC, OGG, FLAC, AMR...)",
|
| 203 |
+
type="filepath",
|
| 204 |
+
sources=["upload", "microphone"],
|
| 205 |
+
)
|
| 206 |
with gr.Row():
|
| 207 |
in_lang = gr.Dropdown(label="Input Language",
|
| 208 |
choices=list(LANGUAGES_DISPLAY.keys()),
|
|
|
|
| 257 |
@demo.app.post("/api/process-url")
|
| 258 |
async def api_process_url(request: _Request):
|
| 259 |
data = await request.json()
|
| 260 |
+
# Handle both plain JSON and Gradio-wrapped {"data": {...}}
|
| 261 |
+
if "data" in data and isinstance(data["data"], dict):
|
| 262 |
+
data = data["data"]
|
| 263 |
audio_url = data.get("audioUrl")
|
| 264 |
audio_id = data.get("audioId", "")
|
| 265 |
src_lang = data.get("srcLang", "auto")
|
|
|
|
| 285 |
try:
|
| 286 |
resp = requests.get(audio_url, timeout=60, stream=True)
|
| 287 |
resp.raise_for_status()
|
| 288 |
+
# β
Detect correct suffix from URL
|
| 289 |
+
url_lower = audio_url.lower()
|
| 290 |
+
if "wav" in url_lower:
|
| 291 |
+
suffix = ".wav"
|
| 292 |
+
elif "mpeg" in url_lower:
|
| 293 |
+
suffix = ".mpeg"
|
| 294 |
+
elif "mp4" in url_lower:
|
| 295 |
+
suffix = ".mp4"
|
| 296 |
+
else:
|
| 297 |
+
suffix = ".mp3"
|
| 298 |
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
|
| 299 |
downloaded = 0
|
| 300 |
total = int(resp.headers.get("content-length", 0))
|
|
|
|
| 304 |
downloaded += len(chunk)
|
| 305 |
if total:
|
| 306 |
pct = int(downloaded * 100 / total)
|
| 307 |
+
yield sse({"status": "processing", "step": 0,
|
| 308 |
+
"message": "Downloading... " + str(pct) + "%"})
|
| 309 |
tmp.close()
|
| 310 |
except Exception as e:
|
| 311 |
yield sse({"status": "error", "message": "Download failed: " + str(e)})
|
| 312 |
return
|
| 313 |
|
| 314 |
+
# β
Convert to wav if needed
|
| 315 |
+
converted_path = convert_to_wav(tmp.name)
|
| 316 |
+
|
| 317 |
+
for result in run_pipeline(converted_path, src_lang, tgt_lang,
|
| 318 |
opt_fillers, opt_stutters, opt_silences,
|
| 319 |
opt_breaths, opt_mouth):
|
| 320 |
result["audioId"] = audio_id
|
|
|
|
| 322 |
|
| 323 |
try:
|
| 324 |
os.unlink(tmp.name)
|
| 325 |
+
if converted_path != tmp.name:
|
| 326 |
+
os.unlink(converted_path)
|
| 327 |
except Exception:
|
| 328 |
pass
|
| 329 |
|
|
|
|
| 336 |
logger.info("β
/api/health and /api/process-url registered on demo.app")
|
| 337 |
|
| 338 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 339 |
+
# LAUNCH
|
| 340 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 341 |
if __name__ == "__main__":
|
| 342 |
demo.launch()
|