Clearwave48 commited on
Commit
0a1c5fe
Β·
verified Β·
1 Parent(s): dcc5fdf

Upload 3 files

Browse files
Files changed (3) hide show
  1. API_README.md +17 -0
  2. Dockerfile +34 -0
  3. main.py +188 -0
API_README.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: ClearWave AI API
3
+ emoji: 🎡
4
+ colorFrom: red
5
+ colorTo: purple
6
+ sdk: docker
7
+ app_port: 7860
8
+ pinned: false
9
+ license: mit
10
+ ---
11
+
12
+ # 🎡 ClearWave AI β€” API
13
+ FastAPI backend for ClearWave AI audio processing pipeline.
14
+
15
+ ## Endpoints
16
+ - `GET /api/health` β€” Health check
17
+ - `POST /api/process-url` β€” Process audio from URL (SSE stream)
Dockerfile ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ RUN apt-get update && apt-get install -y \
4
+ ffmpeg git curl \
5
+ && rm -rf /var/lib/apt/lists/*
6
+
7
+ WORKDIR /app
8
+
9
+ # Install PyTorch CPU first
10
+ RUN pip install --no-cache-dir torch torchaudio \
11
+ --index-url https://download.pytorch.org/whl/cpu
12
+
13
+ # Install all other dependencies
14
+ RUN pip install --no-cache-dir \
15
+ fastapi uvicorn \
16
+ requests \
17
+ groq \
18
+ deep-translator transformers tokenizers \
19
+ huggingface_hub sentencepiece sacremoses \
20
+ soundfile noisereduce numpy pyloudnorm \
21
+ librosa ffmpeg-python faster-whisper
22
+
23
+ COPY . .
24
+
25
+ RUN useradd -m -u 1000 user
26
+ USER user
27
+
28
+ ENV HF_HOME=/app/.cache/huggingface
29
+ ENV TRANSFORMERS_CACHE=/app/.cache/huggingface
30
+ ENV HOME=/home/user
31
+
32
+ EXPOSE 7860
33
+
34
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ClearWave AI β€” API Space (FastAPI only)
3
+ Handles /api/health and /api/process-url
4
+ No Gradio, no routing conflicts.
5
+ """
6
+
7
+ import os
8
+ import json
9
+ import base64
10
+ import tempfile
11
+ import logging
12
+ import time
13
+ import requests
14
+ import numpy as np
15
+ from fastapi import FastAPI, Request
16
+ from fastapi.responses import StreamingResponse, JSONResponse
17
+ from fastapi.middleware.cors import CORSMiddleware
18
+
19
+ logging.basicConfig(level=logging.INFO)
20
+ logger = logging.getLogger(__name__)
21
+
22
+ from denoiser import Denoiser
23
+ from transcriber import Transcriber
24
+ from translator import Translator
25
+
26
+ denoiser = Denoiser()
27
+ transcriber = Transcriber()
28
+ translator = Translator()
29
+
30
+ app = FastAPI(title="ClearWave AI API")
31
+
32
+ app.add_middleware(
33
+ CORSMiddleware,
34
+ allow_origins=["*"],
35
+ allow_methods=["*"],
36
+ allow_headers=["*"],
37
+ )
38
+
39
+ # ══════════════════════════════════════════════════════════════════════
40
+ # PIPELINE
41
+ # ══════════════════════════════════════════════════════════════════════
42
+ def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
43
+ opt_fillers=True, opt_stutters=True, opt_silences=True,
44
+ opt_breaths=True, opt_mouth=True):
45
+ out_dir = tempfile.mkdtemp()
46
+ try:
47
+ yield {"status": "processing", "step": 1, "message": "Step 1/5 β€” Denoising..."}
48
+ denoise1 = denoiser.process(
49
+ audio_path, out_dir,
50
+ remove_fillers=False, remove_stutters=False,
51
+ remove_silences=opt_silences, remove_breaths=opt_breaths,
52
+ remove_mouth_sounds=opt_mouth, word_segments=None,
53
+ )
54
+ clean1 = denoise1["audio_path"]
55
+ stats = denoise1["stats"]
56
+
57
+ yield {"status": "processing", "step": 2, "message": "Step 2/5 β€” Transcribing..."}
58
+ transcript, detected_lang, t_method = transcriber.transcribe(clean1, src_lang)
59
+ word_segs = transcriber._last_segments
60
+
61
+ if (opt_fillers or opt_stutters) and word_segs:
62
+ yield {"status": "processing", "step": 3, "message": "Step 3/5 β€” Removing fillers & stutters..."}
63
+ import soundfile as sf
64
+ audio_data, sr = sf.read(clean1)
65
+ if audio_data.ndim == 2:
66
+ audio_data = audio_data.mean(axis=1)
67
+ audio_data = audio_data.astype(np.float32)
68
+ if opt_fillers:
69
+ audio_data, n_f = denoiser._remove_fillers(audio_data, sr, word_segs)
70
+ stats["fillers_removed"] = n_f
71
+ transcript = denoiser.clean_transcript_fillers(transcript)
72
+ if opt_stutters:
73
+ audio_data, n_s = denoiser._remove_stutters(audio_data, sr, word_segs)
74
+ stats["stutters_removed"] = n_s
75
+ sf.write(clean1, audio_data, sr, subtype="PCM_24")
76
+ else:
77
+ stats["fillers_removed"] = 0
78
+ stats["stutters_removed"] = 0
79
+
80
+ translation = transcript
81
+ tl_method = "same language"
82
+ if tgt_lang != "auto" and detected_lang != tgt_lang:
83
+ yield {"status": "processing", "step": 4, "message": "Step 4/5 β€” Translating..."}
84
+ translation, tl_method = translator.translate(transcript, detected_lang, tgt_lang)
85
+
86
+ yield {"status": "processing", "step": 5, "message": "Step 5/5 β€” Summarizing..."}
87
+ summary = translator.summarize(transcript)
88
+
89
+ with open(clean1, "rb") as f:
90
+ enhanced_b64 = base64.b64encode(f.read()).decode("utf-8")
91
+
92
+ yield {
93
+ "status": "done",
94
+ "step": 5,
95
+ "message": "Done!",
96
+ "transcript": transcript,
97
+ "translation": translation,
98
+ "summary": summary,
99
+ "enhancedAudio": enhanced_b64,
100
+ "stats": {
101
+ "language": detected_lang.upper(),
102
+ "noise_method": stats.get("noise_method", "noisereduce"),
103
+ "fillers_removed": stats.get("fillers_removed", 0),
104
+ "stutters_removed": stats.get("stutters_removed", 0),
105
+ "silences_removed_sec": stats.get("silences_removed_sec", 0),
106
+ "breaths_reduced": stats.get("breaths_reduced", False),
107
+ "mouth_sounds_removed": stats.get("mouth_sounds_removed", 0),
108
+ "transcription_method": t_method,
109
+ "translation_method": tl_method,
110
+ "processing_sec": stats.get("processing_sec", 0),
111
+ "word_segments": len(word_segs),
112
+ "transcript_words": len(transcript.split()),
113
+ },
114
+ }
115
+ except Exception as e:
116
+ logger.error(f"Pipeline failed: {e}", exc_info=True)
117
+ yield {"status": "error", "message": f"Error: {str(e)}"}
118
+
119
+
120
+ # ══════════════════════════════════════════════════════════════════════
121
+ # ROUTES
122
+ # ══════════════════════════════════════════════════════════════════════
123
+ @app.get("/api/health")
124
+ async def health():
125
+ return JSONResponse({"status": "ok", "service": "ClearWave AI API"})
126
+
127
+
128
+ @app.post("/api/process-url")
129
+ async def process_url(request: Request):
130
+ data = await request.json()
131
+ audio_url = data.get("audioUrl")
132
+ audio_id = data.get("audioId", "")
133
+ src_lang = data.get("srcLang", "auto")
134
+ tgt_lang = data.get("tgtLang", "te")
135
+ opt_fillers = data.get("optFillers", True)
136
+ opt_stutters = data.get("optStutters", True)
137
+ opt_silences = data.get("optSilences", True)
138
+ opt_breaths = data.get("optBreaths", True)
139
+ opt_mouth = data.get("optMouth", True)
140
+
141
+ if not audio_url:
142
+ return JSONResponse({"error": "audioUrl is required"}, status_code=400)
143
+
144
+ async def generate():
145
+ import sys
146
+
147
+ def sse(obj):
148
+ sys.stdout.flush()
149
+ return "data: " + json.dumps(obj) + "\n\n"
150
+
151
+ yield sse({"status": "processing", "step": 0, "message": "Downloading audio..."})
152
+
153
+ try:
154
+ resp = requests.get(audio_url, timeout=60, stream=True)
155
+ resp.raise_for_status()
156
+ suffix = ".wav" if "wav" in audio_url.lower() else ".mp3"
157
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
158
+ downloaded = 0
159
+ total = int(resp.headers.get("content-length", 0))
160
+ for chunk in resp.iter_content(chunk_size=65536):
161
+ if chunk:
162
+ tmp.write(chunk)
163
+ downloaded += len(chunk)
164
+ if total:
165
+ pct = int(downloaded * 100 / total)
166
+ yield sse({"status": "processing", "step": 0,
167
+ "message": "Downloading... " + str(pct) + "%"})
168
+ tmp.close()
169
+ except Exception as e:
170
+ yield sse({"status": "error", "message": "Download failed: " + str(e)})
171
+ return
172
+
173
+ for result in run_pipeline(tmp.name, src_lang, tgt_lang,
174
+ opt_fillers, opt_stutters, opt_silences,
175
+ opt_breaths, opt_mouth):
176
+ result["audioId"] = audio_id
177
+ yield sse(result)
178
+
179
+ try:
180
+ os.unlink(tmp.name)
181
+ except Exception:
182
+ pass
183
+
184
+ return StreamingResponse(
185
+ generate(),
186
+ media_type="text/event-stream",
187
+ headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
188
+ )