Spaces:

Clearwave48
/

clearwave-api

Sleeping

App Files Files Community

Clearwave48 commited on Mar 14

Commit

0a1c5fe

verified ·

1 Parent(s): dcc5fdf

Upload 3 files

Browse files

Files changed (3) hide show

API_README.md +17 -0
Dockerfile +34 -0
main.py +188 -0

API_README.md ADDED Viewed

	@@ -0,0 +1,17 @@

+---
+title: ClearWave AI API
+emoji: 🎵
+colorFrom: red
+colorTo: purple
+sdk: docker
+app_port: 7860
+pinned: false
+license: mit
+---
+# 🎵 ClearWave AI — API
+FastAPI backend for ClearWave AI audio processing pipeline.
+## Endpoints
+- `GET /api/health` — Health check
+- `POST /api/process-url` — Process audio from URL (SSE stream)

Dockerfile ADDED Viewed

	@@ -0,0 +1,34 @@

+FROM python:3.10-slim
+RUN apt-get update && apt-get install -y \
+    ffmpeg git curl \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+# Install PyTorch CPU first
+RUN pip install --no-cache-dir torch torchaudio \
+    --index-url https://download.pytorch.org/whl/cpu
+# Install all other dependencies
+RUN pip install --no-cache-dir \
+    fastapi uvicorn \
+    requests \
+    groq \
+    deep-translator transformers tokenizers \
+    huggingface_hub sentencepiece sacremoses \
+    soundfile noisereduce numpy pyloudnorm \
+    librosa ffmpeg-python faster-whisper
+COPY . .
+RUN useradd -m -u 1000 user
+USER user
+ENV HF_HOME=/app/.cache/huggingface
+ENV TRANSFORMERS_CACHE=/app/.cache/huggingface
+ENV HOME=/home/user
+EXPOSE 7860
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

main.py ADDED Viewed

	@@ -0,0 +1,188 @@

+"""
+ClearWave AI — API Space (FastAPI only)
+Handles /api/health and /api/process-url
+No Gradio, no routing conflicts.
+"""
+import os
+import json
+import base64
+import tempfile
+import logging
+import time
+import requests
+import numpy as np
+from fastapi import FastAPI, Request
+from fastapi.responses import StreamingResponse, JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+from denoiser    import Denoiser
+from transcriber import Transcriber
+from translator  import Translator
+denoiser    = Denoiser()
+transcriber = Transcriber()
+translator  = Translator()
+app = FastAPI(title="ClearWave AI API")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# ══════════════════════════════════════════════════════════════════════
+# PIPELINE
+# ══════════════════════════════════════════════════════════════════════
+def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
+                 opt_fillers=True, opt_stutters=True, opt_silences=True,
+                 opt_breaths=True, opt_mouth=True):
+    out_dir = tempfile.mkdtemp()
+    try:
+        yield {"status": "processing", "step": 1, "message": "Step 1/5 — Denoising..."}
+        denoise1 = denoiser.process(
+            audio_path, out_dir,
+            remove_fillers=False, remove_stutters=False,
+            remove_silences=opt_silences, remove_breaths=opt_breaths,
+            remove_mouth_sounds=opt_mouth, word_segments=None,
+        )
+        clean1 = denoise1["audio_path"]
+        stats  = denoise1["stats"]
+        yield {"status": "processing", "step": 2, "message": "Step 2/5 — Transcribing..."}
+        transcript, detected_lang, t_method = transcriber.transcribe(clean1, src_lang)
+        word_segs = transcriber._last_segments
+        if (opt_fillers or opt_stutters) and word_segs:
+            yield {"status": "processing", "step": 3, "message": "Step 3/5 — Removing fillers & stutters..."}
+            import soundfile as sf
+            audio_data, sr = sf.read(clean1)
+            if audio_data.ndim == 2:
+                audio_data = audio_data.mean(axis=1)
+            audio_data = audio_data.astype(np.float32)
+            if opt_fillers:
+                audio_data, n_f = denoiser._remove_fillers(audio_data, sr, word_segs)
+                stats["fillers_removed"] = n_f
+                transcript = denoiser.clean_transcript_fillers(transcript)
+            if opt_stutters:
+                audio_data, n_s = denoiser._remove_stutters(audio_data, sr, word_segs)
+                stats["stutters_removed"] = n_s
+            sf.write(clean1, audio_data, sr, subtype="PCM_24")
+        else:
+            stats["fillers_removed"]  = 0
+            stats["stutters_removed"] = 0
+        translation = transcript
+        tl_method   = "same language"
+        if tgt_lang != "auto" and detected_lang != tgt_lang:
+            yield {"status": "processing", "step": 4, "message": "Step 4/5 — Translating..."}
+            translation, tl_method = translator.translate(transcript, detected_lang, tgt_lang)
+        yield {"status": "processing", "step": 5, "message": "Step 5/5 — Summarizing..."}
+        summary = translator.summarize(transcript)
+        with open(clean1, "rb") as f:
+            enhanced_b64 = base64.b64encode(f.read()).decode("utf-8")
+        yield {
+            "status":        "done",
+            "step":          5,
+            "message":       "Done!",
+            "transcript":    transcript,
+            "translation":   translation,
+            "summary":       summary,
+            "enhancedAudio": enhanced_b64,
+            "stats": {
+                "language":             detected_lang.upper(),
+                "noise_method":         stats.get("noise_method", "noisereduce"),
+                "fillers_removed":      stats.get("fillers_removed", 0),
+                "stutters_removed":     stats.get("stutters_removed", 0),
+                "silences_removed_sec": stats.get("silences_removed_sec", 0),
+                "breaths_reduced":      stats.get("breaths_reduced", False),
+                "mouth_sounds_removed": stats.get("mouth_sounds_removed", 0),
+                "transcription_method": t_method,
+                "translation_method":   tl_method,
+                "processing_sec":       stats.get("processing_sec", 0),
+                "word_segments":        len(word_segs),
+                "transcript_words":     len(transcript.split()),
+            },
+        }
+    except Exception as e:
+        logger.error(f"Pipeline failed: {e}", exc_info=True)
+        yield {"status": "error", "message": f"Error: {str(e)}"}
+# ══════════════════════════════════════════════════════════════════════
+# ROUTES
+# ══════════════════════════════════════════════════════════════════════
+@app.get("/api/health")
+async def health():
+    return JSONResponse({"status": "ok", "service": "ClearWave AI API"})
+@app.post("/api/process-url")
+async def process_url(request: Request):
+    data         = await request.json()
+    audio_url    = data.get("audioUrl")
+    audio_id     = data.get("audioId",     "")
+    src_lang     = data.get("srcLang",     "auto")
+    tgt_lang     = data.get("tgtLang",     "te")
+    opt_fillers  = data.get("optFillers",  True)
+    opt_stutters = data.get("optStutters", True)
+    opt_silences = data.get("optSilences", True)
+    opt_breaths  = data.get("optBreaths",  True)
+    opt_mouth    = data.get("optMouth",    True)
+    if not audio_url:
+        return JSONResponse({"error": "audioUrl is required"}, status_code=400)
+    async def generate():
+        import sys
+        def sse(obj):
+            sys.stdout.flush()
+            return "data: " + json.dumps(obj) + "\n\n"
+        yield sse({"status": "processing", "step": 0, "message": "Downloading audio..."})
+        try:
+            resp = requests.get(audio_url, timeout=60, stream=True)
+            resp.raise_for_status()
+            suffix = ".wav" if "wav" in audio_url.lower() else ".mp3"
+            tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
+            downloaded = 0
+            total = int(resp.headers.get("content-length", 0))
+            for chunk in resp.iter_content(chunk_size=65536):
+                if chunk:
+                    tmp.write(chunk)
+                    downloaded += len(chunk)
+                    if total:
+                        pct = int(downloaded * 100 / total)
+                        yield sse({"status": "processing", "step": 0,
+                                   "message": "Downloading... " + str(pct) + "%"})
+            tmp.close()
+        except Exception as e:
+            yield sse({"status": "error", "message": "Download failed: " + str(e)})
+            return
+        for result in run_pipeline(tmp.name, src_lang, tgt_lang,
+                                   opt_fillers, opt_stutters, opt_silences,
+                                   opt_breaths, opt_mouth):
+            result["audioId"] = audio_id
+            yield sse(result)
+        try:
+            os.unlink(tmp.name)
+        except Exception:
+            pass
+    return StreamingResponse(
+        generate(),
+        media_type="text/event-stream",
+        headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
+    )