Spaces:
Running
Running
| """ | |
| ClearWave AI β API Space (FastAPI only) | |
| Handles /api/health and /api/process-url | |
| No Gradio, no routing conflicts. | |
| """ | |
| import os | |
| import json | |
| import tempfile | |
| import logging | |
| import requests | |
| import numpy as np | |
| import cloudinary | |
| import cloudinary.uploader | |
| from fastapi import FastAPI, Request | |
| from fastapi.responses import StreamingResponse, JSONResponse | |
| from fastapi.middleware.cors import CORSMiddleware | |
| # Cloudinary config β set these in your HF Space secrets | |
| cloudinary.config( | |
| cloud_name = os.environ.get("CLOUD_NAME"), | |
| api_key = os.environ.get("API_KEY"), | |
| api_secret = os.environ.get("API_SECRET"), | |
| ) | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| from denoiser import Denoiser | |
| from transcriber import Transcriber | |
| from translator import Translator | |
| denoiser = Denoiser() | |
| transcriber = Transcriber() | |
| translator = Translator() | |
| app = FastAPI(title="ClearWave AI API") | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # PIPELINE | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def run_pipeline(audio_path, src_lang="auto", tgt_lang="te", | |
| opt_fillers=True, opt_stutters=True, opt_silences=True, | |
| opt_breaths=True, opt_mouth=True): | |
| out_dir = tempfile.mkdtemp() | |
| try: | |
| yield {"status": "processing", "step": 1, "message": "Step 1/5 β Denoising..."} | |
| denoise1 = denoiser.process( | |
| audio_path, out_dir, | |
| remove_fillers=False, remove_stutters=False, | |
| remove_silences=opt_silences, remove_breaths=opt_breaths, | |
| remove_mouth_sounds=opt_mouth, word_segments=None, | |
| ) | |
| clean1 = denoise1["audio_path"] | |
| stats = denoise1["stats"] | |
| yield {"status": "processing", "step": 2, "message": "Step 2/5 β Transcribing..."} | |
| transcript, detected_lang, t_method = transcriber.transcribe(clean1, src_lang) | |
| word_segs = transcriber._last_segments | |
| if (opt_fillers or opt_stutters) and word_segs: | |
| yield {"status": "processing", "step": 3, "message": "Step 3/5 β Removing fillers & stutters..."} | |
| import soundfile as sf | |
| # Read the denoised audio β soundfile can read both WAV and MP3 | |
| audio_data, sr = sf.read(clean1) | |
| if audio_data.ndim == 2: | |
| audio_data = audio_data.mean(axis=1) | |
| audio_data = audio_data.astype(np.float32) | |
| if opt_fillers: | |
| audio_data, n_f = denoiser._remove_fillers(audio_data, sr, word_segs) | |
| stats["fillers_removed"] = n_f | |
| transcript = denoiser.clean_transcript_fillers(transcript) | |
| if opt_stutters: | |
| audio_data, n_s = denoiser._remove_stutters(audio_data, sr, word_segs) | |
| stats["stutters_removed"] = n_s | |
| # Write to a fresh .wav β PCM_24 is WAV-only, never write to .mp3 path | |
| clean_wav = os.path.join(out_dir, "clean_step3.wav") | |
| sf.write(clean_wav, audio_data, sr, format="WAV", subtype="PCM_24") | |
| clean1 = clean_wav # downstream steps (Cloudinary upload) use this | |
| else: | |
| stats["fillers_removed"] = 0 | |
| stats["stutters_removed"] = 0 | |
| translation = transcript | |
| tl_method = "same language" | |
| if tgt_lang != "auto" and detected_lang != tgt_lang: | |
| yield {"status": "processing", "step": 4, "message": "Step 4/5 β Translating..."} | |
| translation, tl_method = translator.translate(transcript, detected_lang, tgt_lang) | |
| yield {"status": "processing", "step": 5, "message": "Step 5/5 β Summarizing..."} | |
| summary = translator.summarize(transcript) | |
| # Upload enhanced audio to Cloudinary β returns a URL instead of base64. | |
| # This keeps the done SSE event tiny (~200 bytes) instead of ~700KB, | |
| # which was causing the JSON to be split across 85+ TCP chunks. | |
| try: | |
| upload_result = cloudinary.uploader.upload( | |
| clean1, | |
| resource_type = "video", # Cloudinary uses "video" for audio | |
| folder = "clearwave_enhanced", | |
| ) | |
| enhanced_url = upload_result["secure_url"] | |
| logger.info(f"Enhanced audio uploaded: {enhanced_url}") | |
| except Exception as e: | |
| logger.error(f"Cloudinary upload failed: {e}") | |
| enhanced_url = None | |
| yield { | |
| "status": "done", | |
| "step": 5, | |
| "message": "Done!", | |
| "transcript": transcript, | |
| "translation": translation, | |
| "summary": summary, | |
| "enhancedAudio": enhanced_url, | |
| "stats": { | |
| "language": detected_lang.upper(), | |
| "noise_method": stats.get("noise_method", "noisereduce"), | |
| "fillers_removed": stats.get("fillers_removed", 0), | |
| "stutters_removed": stats.get("stutters_removed", 0), | |
| "silences_removed_sec": stats.get("silences_removed_sec", 0), | |
| "breaths_reduced": stats.get("breaths_reduced", False), | |
| "mouth_sounds_removed": stats.get("mouth_sounds_removed", 0), | |
| "transcription_method": t_method, | |
| "translation_method": tl_method, | |
| "processing_sec": stats.get("processing_sec", 0), | |
| "word_segments": len(word_segs), | |
| "transcript_words": len(transcript.split()), | |
| }, | |
| } | |
| except Exception as e: | |
| logger.error(f"Pipeline failed: {e}", exc_info=True) | |
| yield {"status": "error", "message": f"Error: {str(e)}"} | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # ROUTES | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async def health(): | |
| return JSONResponse({"status": "ok", "service": "ClearWave AI API"}) | |
| async def process_url(request: Request): | |
| data = await request.json() | |
| audio_url = data.get("audioUrl") | |
| audio_id = data.get("audioId", "") | |
| src_lang = data.get("srcLang", "auto") | |
| tgt_lang = data.get("tgtLang", "te") | |
| opt_fillers = data.get("optFillers", True) | |
| opt_stutters = data.get("optStutters", True) | |
| opt_silences = data.get("optSilences", True) | |
| opt_breaths = data.get("optBreaths", True) | |
| opt_mouth = data.get("optMouth", True) | |
| if not audio_url: | |
| return JSONResponse({"error": "audioUrl is required"}, status_code=400) | |
| async def generate(): | |
| import sys | |
| def sse(obj): | |
| sys.stdout.flush() | |
| return "data: " + json.dumps(obj) + "\n\n" | |
| yield sse({"status": "processing", "step": 0, "message": "Downloading audio..."}) | |
| try: | |
| resp = requests.get(audio_url, timeout=60, stream=True) | |
| resp.raise_for_status() | |
| suffix = ".wav" if "wav" in audio_url.lower() else ".mp3" | |
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix) | |
| downloaded = 0 | |
| total = int(resp.headers.get("content-length", 0)) | |
| for chunk in resp.iter_content(chunk_size=65536): | |
| if chunk: | |
| tmp.write(chunk) | |
| downloaded += len(chunk) | |
| if total: | |
| pct = int(downloaded * 100 / total) | |
| yield sse({"status": "processing", "step": 0, | |
| "message": "Downloading... " + str(pct) + "%"}) | |
| tmp.close() | |
| except Exception as e: | |
| yield sse({"status": "error", "message": "Download failed: " + str(e)}) | |
| return | |
| for result in run_pipeline(tmp.name, src_lang, tgt_lang, | |
| opt_fillers, opt_stutters, opt_silences, | |
| opt_breaths, opt_mouth): | |
| result["audioId"] = audio_id | |
| yield sse(result) | |
| try: | |
| os.unlink(tmp.name) | |
| except Exception: | |
| pass | |
| return StreamingResponse( | |
| generate(), | |
| media_type="text/event-stream", | |
| headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"}, | |
| ) |