clearwave-api / main.py
Clearwave48's picture
Update main.py
a960e11 verified
"""
ClearWave AI β€” API Space (FastAPI only)
Handles /api/health and /api/process-url
No Gradio, no routing conflicts.
"""
import os
import json
import tempfile
import logging
import requests
import numpy as np
import cloudinary
import cloudinary.uploader
from fastapi import FastAPI, Request
from fastapi.responses import StreamingResponse, JSONResponse
from fastapi.middleware.cors import CORSMiddleware
# Cloudinary config β€” set these in your HF Space secrets
cloudinary.config(
cloud_name = os.environ.get("CLOUD_NAME"),
api_key = os.environ.get("API_KEY"),
api_secret = os.environ.get("API_SECRET"),
)
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
from denoiser import Denoiser
from transcriber import Transcriber
from translator import Translator
denoiser = Denoiser()
transcriber = Transcriber()
translator = Translator()
app = FastAPI(title="ClearWave AI API")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# ══════════════════════════════════════════════════════════════════════
# PIPELINE
# ══════════════════════════════════════════════════════════════════════
def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
opt_fillers=True, opt_stutters=True, opt_silences=True,
opt_breaths=True, opt_mouth=True):
out_dir = tempfile.mkdtemp()
try:
yield {"status": "processing", "step": 1, "message": "Step 1/5 β€” Denoising..."}
denoise1 = denoiser.process(
audio_path, out_dir,
remove_fillers=False, remove_stutters=False,
remove_silences=opt_silences, remove_breaths=opt_breaths,
remove_mouth_sounds=opt_mouth, word_segments=None,
)
clean1 = denoise1["audio_path"]
stats = denoise1["stats"]
yield {"status": "processing", "step": 2, "message": "Step 2/5 β€” Transcribing..."}
transcript, detected_lang, t_method = transcriber.transcribe(clean1, src_lang)
word_segs = transcriber._last_segments
if (opt_fillers or opt_stutters) and word_segs:
yield {"status": "processing", "step": 3, "message": "Step 3/5 β€” Removing fillers & stutters..."}
import soundfile as sf
# Read the denoised audio β€” soundfile can read both WAV and MP3
audio_data, sr = sf.read(clean1)
if audio_data.ndim == 2:
audio_data = audio_data.mean(axis=1)
audio_data = audio_data.astype(np.float32)
if opt_fillers:
audio_data, n_f = denoiser._remove_fillers(audio_data, sr, word_segs)
stats["fillers_removed"] = n_f
transcript = denoiser.clean_transcript_fillers(transcript)
if opt_stutters:
audio_data, n_s = denoiser._remove_stutters(audio_data, sr, word_segs)
stats["stutters_removed"] = n_s
# Write to a fresh .wav β€” PCM_24 is WAV-only, never write to .mp3 path
clean_wav = os.path.join(out_dir, "clean_step3.wav")
sf.write(clean_wav, audio_data, sr, format="WAV", subtype="PCM_24")
clean1 = clean_wav # downstream steps (Cloudinary upload) use this
else:
stats["fillers_removed"] = 0
stats["stutters_removed"] = 0
translation = transcript
tl_method = "same language"
if tgt_lang != "auto" and detected_lang != tgt_lang:
yield {"status": "processing", "step": 4, "message": "Step 4/5 β€” Translating..."}
translation, tl_method = translator.translate(transcript, detected_lang, tgt_lang)
yield {"status": "processing", "step": 5, "message": "Step 5/5 β€” Summarizing..."}
summary = translator.summarize(transcript)
# Upload enhanced audio to Cloudinary β€” returns a URL instead of base64.
# This keeps the done SSE event tiny (~200 bytes) instead of ~700KB,
# which was causing the JSON to be split across 85+ TCP chunks.
try:
upload_result = cloudinary.uploader.upload(
clean1,
resource_type = "video", # Cloudinary uses "video" for audio
folder = "clearwave_enhanced",
)
enhanced_url = upload_result["secure_url"]
logger.info(f"Enhanced audio uploaded: {enhanced_url}")
except Exception as e:
logger.error(f"Cloudinary upload failed: {e}")
enhanced_url = None
yield {
"status": "done",
"step": 5,
"message": "Done!",
"transcript": transcript,
"translation": translation,
"summary": summary,
"enhancedAudio": enhanced_url,
"stats": {
"language": detected_lang.upper(),
"noise_method": stats.get("noise_method", "noisereduce"),
"fillers_removed": stats.get("fillers_removed", 0),
"stutters_removed": stats.get("stutters_removed", 0),
"silences_removed_sec": stats.get("silences_removed_sec", 0),
"breaths_reduced": stats.get("breaths_reduced", False),
"mouth_sounds_removed": stats.get("mouth_sounds_removed", 0),
"transcription_method": t_method,
"translation_method": tl_method,
"processing_sec": stats.get("processing_sec", 0),
"word_segments": len(word_segs),
"transcript_words": len(transcript.split()),
},
}
except Exception as e:
logger.error(f"Pipeline failed: {e}", exc_info=True)
yield {"status": "error", "message": f"Error: {str(e)}"}
# ══════════════════════════════════════════════════════════════════════
# ROUTES
# ══════════════════════════════════════════════════════════════════════
@app.get("/api/health")
async def health():
return JSONResponse({"status": "ok", "service": "ClearWave AI API"})
@app.post("/api/process-url")
async def process_url(request: Request):
data = await request.json()
audio_url = data.get("audioUrl")
audio_id = data.get("audioId", "")
src_lang = data.get("srcLang", "auto")
tgt_lang = data.get("tgtLang", "te")
opt_fillers = data.get("optFillers", True)
opt_stutters = data.get("optStutters", True)
opt_silences = data.get("optSilences", True)
opt_breaths = data.get("optBreaths", True)
opt_mouth = data.get("optMouth", True)
if not audio_url:
return JSONResponse({"error": "audioUrl is required"}, status_code=400)
async def generate():
import sys
def sse(obj):
sys.stdout.flush()
return "data: " + json.dumps(obj) + "\n\n"
yield sse({"status": "processing", "step": 0, "message": "Downloading audio..."})
try:
resp = requests.get(audio_url, timeout=60, stream=True)
resp.raise_for_status()
suffix = ".wav" if "wav" in audio_url.lower() else ".mp3"
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
downloaded = 0
total = int(resp.headers.get("content-length", 0))
for chunk in resp.iter_content(chunk_size=65536):
if chunk:
tmp.write(chunk)
downloaded += len(chunk)
if total:
pct = int(downloaded * 100 / total)
yield sse({"status": "processing", "step": 0,
"message": "Downloading... " + str(pct) + "%"})
tmp.close()
except Exception as e:
yield sse({"status": "error", "message": "Download failed: " + str(e)})
return
for result in run_pipeline(tmp.name, src_lang, tgt_lang,
opt_fillers, opt_stutters, opt_silences,
opt_breaths, opt_mouth):
result["audioId"] = audio_id
yield sse(result)
try:
os.unlink(tmp.name)
except Exception:
pass
return StreamingResponse(
generate(),
media_type="text/event-stream",
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
)