Spaces:

Clearwave48
/

clearwave-api

Running

File size: 11,762 Bytes

0a1c5fe
2fcb053
 
 
 
 
 
 
 
 
 
 
 
 
 
0a1c5fe
 
 
 
785a835
0a1c5fe
 
 
f964d49
 
0a1c5fe
 
 
 
2fcb053
 
 
 
 
f964d49
 
 
 
 
 
0a1c5fe
 
 
2fcb053
 
0a1c5fe
 
 
 
 
 
 
 
 
 
 
 
785a835
 
0a1c5fe
785a835
 
0a1c5fe
 
 
2fcb053
 
 
 
 
 
785a835
 
0a1c5fe
785a835
 
 
 
2fcb053
785a835
2fcb053
 
 
 
 
785a835
 
2fcb053
 
 
 
 
 
 
 
785a835
 
 
 
 
 
 
 
 
 
 
 
 
0a1c5fe
785a835
 
 
0a1c5fe
 
2fcb053
 
 
 
 
785a835
0a1c5fe
785a835
0a1c5fe
 
 
785a835
 
0a1c5fe
785a835
 
 
 
0a1c5fe
785a835
 
 
0a1c5fe
 
2fcb053
f964d49
 
 
785a835
 
f964d49
 
785a835
f964d49
785a835
0a1c5fe
785a835
0a1c5fe
 
785a835
0a1c5fe
 
 
 
f964d49
0a1c5fe
 
785a835
0a1c5fe
 
 
 
 
 
 
 
 
 
 
785a835
0a1c5fe
2fcb053
0a1c5fe
 
 
785a835
0a1c5fe
785a835
 
0a1c5fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2fcb053
0a1c5fe
 
 
2fcb053
 
 
 
 
 
 
 
 
 
 
0a1c5fe
2fcb053
0a1c5fe
 
 
 
 
 
 
2fcb053
0a1c5fe
 
2fcb053
0a1c5fe
 
2fcb053
0a1c5fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f964d49

"""
ClearWave AI — API Space (FastAPI)
===================================
Endpoints: /api/health  |  /api/process-url

Pipeline:
  1. Download audio from URL
  2. Denoise / enhance  → Denoiser (Cleanvoice SDK)
  3. Transcribe         → Groq Whisper large-v3 (primary) / faster-whisper (fallback)
  4. Translate          → NLLB-200-1.3B (primary) / Google Translate (fallback)
  5. Summarize          → Extractive (position-scored)
  6. Upload result      → Cloudinary

All secrets read from HF Space environment variables:
  CLEANVOICE_API_KEY, CLOUD_NAME, API_KEY, API_SECRET, GROQ_API_KEY
"""

import os
import json
import time
import tempfile
import logging
import requests
import cloudinary
import cloudinary.uploader
from fastapi import FastAPI, Request
from fastapi.responses import StreamingResponse, JSONResponse
from fastapi.middleware.cors import CORSMiddleware

from denoiser    import Denoiser
from transcriber import Transcriber
from translator  import Translator

# ── Cloudinary config ──────────────────────────────────────────────────────────
cloudinary.config(
    cloud_name = os.environ.get("CLOUD_NAME"),
    api_key    = os.environ.get("API_KEY"),
    api_secret = os.environ.get("API_SECRET"),
)

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# ── Singleton instances (loaded once at startup) ───────────────────────────────
denoiser    = Denoiser()
transcriber = Transcriber()
translator  = Translator()

app = FastAPI(title="ClearWave AI API")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)


# ══════════════════════════════════════════════════════════════════════════════
# PIPELINE
# ══════════════════════════════════════════════════════════════════════════════

def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
                 opt_fillers=True, opt_stutters=True, opt_silences=True,
                 opt_breaths=True, opt_mouth=True):
    """
    Generator — yields SSE-style dicts at each step.
    Caller wraps each dict in  "data: <json>\n\n"
    """
    out_dir   = tempfile.mkdtemp()
    stats     = {}
    word_segs = []

    try:
        # ── Step 1: Cleanvoice — full audio enhancement ───────────────────────
        yield {"status": "processing", "step": 1,
               "message": "Step 1/4 — Enhancing audio with Cleanvoice..."}
        try:
            result = denoiser.process(
                audio_path, out_dir,
                fillers=opt_fillers,
                stutters=opt_stutters,
                long_silences=opt_silences,
                breaths=opt_breaths,
                mouth_sounds=opt_mouth,
            )
            clean1 = result["audio_path"]
            stats  = {
                "noise_method":         "Cleanvoice API",
                "fillers_removed":      "yes" if opt_fillers  else "no",
                "stutters_removed":     "yes" if opt_stutters else "no",
                "silences_removed_sec": "yes" if opt_silences else "no",
                "breaths_reduced":      opt_breaths,
                "mouth_sounds_removed": "yes" if opt_mouth    else "no",
            }
            logger.info("[Pipeline] Cleanvoice enhancement complete")
        except Exception as e:
            # Cleanvoice failed — log it and continue with original audio
            logger.error(f"[Pipeline] Cleanvoice failed: {e} — using original audio")
            clean1 = audio_path
            stats  = {
                "noise_method":         f"Cleanvoice failed: {e}",
                "fillers_removed":      0,
                "stutters_removed":     0,
                "silences_removed_sec": 0,
                "breaths_reduced":      False,
                "mouth_sounds_removed": 0,
            }

        # ── Step 2: Transcribe ────────────────────────────────────────────────
        yield {"status": "processing", "step": 2,
               "message": "Step 2/4 — Transcribing..."}
        transcript, detected_lang, t_method = transcriber.transcribe(clean1, src_lang)
        word_segs = transcriber._last_segments

        # Clean filler words from transcript text too
        if opt_fillers:
            transcript = denoiser.clean_transcript_fillers(transcript)

        logger.info(f"[Pipeline] Transcription done: {len(transcript.split())} words, lang={detected_lang}")

        # ── Step 3: Translate ─────────────────────────────────────────────────
        translation = transcript
        tl_method   = "same language"
        if tgt_lang != "auto" and detected_lang != tgt_lang:
            yield {"status": "processing", "step": 3,
                   "message": "Step 3/4 — Translating..."}
            translation, tl_method = translator.translate(transcript, detected_lang, tgt_lang)
            logger.info(f"[Pipeline] Translation done via {tl_method}")
        else:
            yield {"status": "processing", "step": 3,
                   "message": "Step 3/4 — Skipping translation (same language)..."}

        # ── Step 4: Summarize + upload to Cloudinary ──────────────────────────
        yield {"status": "processing", "step": 4,
               "message": "Step 4/4 — Summarizing & uploading..."}
        summary = translator.summarize(transcript)

        enhanced_url = None
        try:
            upload_result = cloudinary.uploader.upload(
                clean1,
                resource_type="video",   # Cloudinary uses "video" for audio files
                folder="clearwave_enhanced",
            )
            enhanced_url = upload_result["secure_url"]
            logger.info(f"[Pipeline] Cloudinary upload done: {enhanced_url}")
        except Exception as e:
            logger.error(f"[Pipeline] Cloudinary upload failed: {e}")

        # ── Done ──────────────────────────────────────────────────────────────
        yield {
            "status":        "done",
            "step":          4,
            "message":       "Done!",
            "transcript":    transcript,
            "translation":   translation,
            "summary":       summary,
            "enhancedAudio": enhanced_url,
            "stats": {
                "language":             detected_lang.upper(),
                "noise_method":         stats.get("noise_method", "Cleanvoice API"),
                "fillers_removed":      stats.get("fillers_removed", 0),
                "stutters_removed":     stats.get("stutters_removed", 0),
                "silences_removed_sec": stats.get("silences_removed_sec", 0),
                "breaths_reduced":      stats.get("breaths_reduced", False),
                "mouth_sounds_removed": stats.get("mouth_sounds_removed", 0),
                "transcription_method": t_method,
                "translation_method":   tl_method,
                "word_segments":        len(word_segs),
                "transcript_words":     len(transcript.split()),
            },
        }

    except Exception as e:
        logger.error(f"[Pipeline] Fatal error: {e}", exc_info=True)
        yield {"status": "error", "message": f"Error: {str(e)}"}


# ══════════════════════════════════════════════════════════════════════════════
# ROUTES
# ══════════════════════════════════════════════════════════════════════════════

@app.get("/api/health")
async def health():
    return JSONResponse({"status": "ok", "service": "ClearWave AI API"})


@app.post("/api/process-url")
async def process_url(request: Request):
    data         = await request.json()
    audio_url    = data.get("audioUrl")
    audio_id     = data.get("audioId",     "")
    src_lang     = data.get("srcLang",     "auto")
    tgt_lang     = data.get("tgtLang",     "te")
    opt_fillers  = data.get("optFillers",  True)
    opt_stutters = data.get("optStutters", True)
    opt_silences = data.get("optSilences", True)
    opt_breaths  = data.get("optBreaths",  True)
    opt_mouth    = data.get("optMouth",    True)

    if not audio_url:
        return JSONResponse({"error": "audioUrl is required"}, status_code=400)

    async def generate():
        import sys

        def sse(obj):
            sys.stdout.flush()
            return "data: " + json.dumps(obj) + "\n\n"

        yield sse({"status": "processing", "step": 0, "message": "Downloading audio..."})

        # ── Download audio from URL ───────────────────────────────────────────
        try:
            resp = requests.get(audio_url, timeout=60, stream=True)
            resp.raise_for_status()

            # Detect extension — support WhatsApp .opus and common formats
            lower_url = audio_url.lower().split("?")[0]
            if   ".opus" in lower_url: suffix = ".opus"
            elif ".ogg"  in lower_url: suffix = ".ogg"
            elif ".aac"  in lower_url: suffix = ".aac"
            elif ".m4a"  in lower_url: suffix = ".m4a"
            elif ".wav"  in lower_url: suffix = ".wav"
            else:                      suffix = ".mp3"

            tmp      = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
            downloaded = 0
            total    = int(resp.headers.get("content-length", 0))
            for chunk in resp.iter_content(chunk_size=65536):
                if chunk:
                    tmp.write(chunk)
                    downloaded += len(chunk)
                    if total:
                        pct = int(downloaded * 100 / total)
                        yield sse({"status": "processing", "step": 0,
                                   "message": f"Downloading... {pct}%"})
            tmp.close()
        except Exception as e:
            yield sse({"status": "error", "message": f"Download failed: {e}"})
            return

        # ── Run pipeline ──────────────────────────────────────────────────────
        for result in run_pipeline(tmp.name, src_lang, tgt_lang,
                                   opt_fillers, opt_stutters, opt_silences,
                                   opt_breaths, opt_mouth):
            result["audioId"] = audio_id
            yield sse(result)

        try:
            os.unlink(tmp.name)
        except Exception:
            pass

    return StreamingResponse(
        generate(),
        media_type="text/event-stream",
        headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
    )