Spaces:

Clearwave48
/

clearwave-api

Running

App Files Files Community

clearwave-api / main.py

Clearwave48

Update main.py

a960e11 verified 7 days ago

raw

history blame contribute delete

9.06 kB

	"""
	ClearWave AI — API Space (FastAPI only)
	Handles /api/health and /api/process-url
	No Gradio, no routing conflicts.
	"""

	import os
	import json
	import tempfile
	import logging
	import requests
	import numpy as np
	import cloudinary
	import cloudinary.uploader
	from fastapi import FastAPI, Request
	from fastapi.responses import StreamingResponse, JSONResponse
	from fastapi.middleware.cors import CORSMiddleware

	# Cloudinary config — set these in your HF Space secrets
	cloudinary.config(
	cloud_name = os.environ.get("CLOUD_NAME"),
	api_key = os.environ.get("API_KEY"),
	api_secret = os.environ.get("API_SECRET"),
	)

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	from denoiser import Denoiser
	from transcriber import Transcriber
	from translator import Translator

	denoiser = Denoiser()
	transcriber = Transcriber()
	translator = Translator()

	app = FastAPI(title="ClearWave AI API")

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# ══════════════════════════════════════════════════════════════════════
	# PIPELINE
	# ══════════════════════════════════════════════════════════════════════
	def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
	opt_fillers=True, opt_stutters=True, opt_silences=True,
	opt_breaths=True, opt_mouth=True):
	out_dir = tempfile.mkdtemp()
	try:
	yield {"status": "processing", "step": 1, "message": "Step 1/5 — Denoising..."}
	denoise1 = denoiser.process(
	audio_path, out_dir,
	remove_fillers=False, remove_stutters=False,
	remove_silences=opt_silences, remove_breaths=opt_breaths,
	remove_mouth_sounds=opt_mouth, word_segments=None,
	)
	clean1 = denoise1["audio_path"]
	stats = denoise1["stats"]

	yield {"status": "processing", "step": 2, "message": "Step 2/5 — Transcribing..."}
	transcript, detected_lang, t_method = transcriber.transcribe(clean1, src_lang)
	word_segs = transcriber._last_segments

	if (opt_fillers or opt_stutters) and word_segs:
	yield {"status": "processing", "step": 3, "message": "Step 3/5 — Removing fillers & stutters..."}
	import soundfile as sf
	# Read the denoised audio — soundfile can read both WAV and MP3
	audio_data, sr = sf.read(clean1)
	if audio_data.ndim == 2:
	audio_data = audio_data.mean(axis=1)
	audio_data = audio_data.astype(np.float32)
	if opt_fillers:
	audio_data, n_f = denoiser._remove_fillers(audio_data, sr, word_segs)
	stats["fillers_removed"] = n_f
	transcript = denoiser.clean_transcript_fillers(transcript)
	if opt_stutters:
	audio_data, n_s = denoiser._remove_stutters(audio_data, sr, word_segs)
	stats["stutters_removed"] = n_s
	# Write to a fresh .wav — PCM_24 is WAV-only, never write to .mp3 path
	clean_wav = os.path.join(out_dir, "clean_step3.wav")
	sf.write(clean_wav, audio_data, sr, format="WAV", subtype="PCM_24")
	clean1 = clean_wav # downstream steps (Cloudinary upload) use this
	else:
	stats["fillers_removed"] = 0
	stats["stutters_removed"] = 0

	translation = transcript
	tl_method = "same language"
	if tgt_lang != "auto" and detected_lang != tgt_lang:
	yield {"status": "processing", "step": 4, "message": "Step 4/5 — Translating..."}
	translation, tl_method = translator.translate(transcript, detected_lang, tgt_lang)

	yield {"status": "processing", "step": 5, "message": "Step 5/5 — Summarizing..."}
	summary = translator.summarize(transcript)

	# Upload enhanced audio to Cloudinary — returns a URL instead of base64.
	# This keeps the done SSE event tiny (~200 bytes) instead of ~700KB,
	# which was causing the JSON to be split across 85+ TCP chunks.
	try:
	upload_result = cloudinary.uploader.upload(
	clean1,
	resource_type = "video", # Cloudinary uses "video" for audio
	folder = "clearwave_enhanced",
	)
	enhanced_url = upload_result["secure_url"]
	logger.info(f"Enhanced audio uploaded: {enhanced_url}")
	except Exception as e:
	logger.error(f"Cloudinary upload failed: {e}")
	enhanced_url = None

	yield {
	"status": "done",
	"step": 5,
	"message": "Done!",
	"transcript": transcript,
	"translation": translation,
	"summary": summary,
	"enhancedAudio": enhanced_url,
	"stats": {
	"language": detected_lang.upper(),
	"noise_method": stats.get("noise_method", "noisereduce"),
	"fillers_removed": stats.get("fillers_removed", 0),
	"stutters_removed": stats.get("stutters_removed", 0),
	"silences_removed_sec": stats.get("silences_removed_sec", 0),
	"breaths_reduced": stats.get("breaths_reduced", False),
	"mouth_sounds_removed": stats.get("mouth_sounds_removed", 0),
	"transcription_method": t_method,
	"translation_method": tl_method,
	"processing_sec": stats.get("processing_sec", 0),
	"word_segments": len(word_segs),
	"transcript_words": len(transcript.split()),
	},
	}
	except Exception as e:
	logger.error(f"Pipeline failed: {e}", exc_info=True)
	yield {"status": "error", "message": f"Error: {str(e)}"}


	# ══════════════════════════════════════════════════════════════════════
	# ROUTES
	# ══════════════════════════════════════════════════════════════════════
	@app.get("/api/health")
	async def health():
	return JSONResponse({"status": "ok", "service": "ClearWave AI API"})


	@app.post("/api/process-url")
	async def process_url(request: Request):
	data = await request.json()
	audio_url = data.get("audioUrl")
	audio_id = data.get("audioId", "")
	src_lang = data.get("srcLang", "auto")
	tgt_lang = data.get("tgtLang", "te")
	opt_fillers = data.get("optFillers", True)
	opt_stutters = data.get("optStutters", True)
	opt_silences = data.get("optSilences", True)
	opt_breaths = data.get("optBreaths", True)
	opt_mouth = data.get("optMouth", True)

	if not audio_url:
	return JSONResponse({"error": "audioUrl is required"}, status_code=400)

	async def generate():
	import sys

	def sse(obj):
	sys.stdout.flush()
	return "data: " + json.dumps(obj) + "\n\n"

	yield sse({"status": "processing", "step": 0, "message": "Downloading audio..."})

	try:
	resp = requests.get(audio_url, timeout=60, stream=True)
	resp.raise_for_status()
	suffix = ".wav" if "wav" in audio_url.lower() else ".mp3"
	tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
	downloaded = 0
	total = int(resp.headers.get("content-length", 0))
	for chunk in resp.iter_content(chunk_size=65536):
	if chunk:
	tmp.write(chunk)
	downloaded += len(chunk)
	if total:
	pct = int(downloaded * 100 / total)
	yield sse({"status": "processing", "step": 0,
	"message": "Downloading... " + str(pct) + "%"})
	tmp.close()
	except Exception as e:
	yield sse({"status": "error", "message": "Download failed: " + str(e)})
	return

	for result in run_pipeline(tmp.name, src_lang, tgt_lang,
	opt_fillers, opt_stutters, opt_silences,
	opt_breaths, opt_mouth):
	result["audioId"] = audio_id
	yield sse(result)

	try:
	os.unlink(tmp.name)
	except Exception:
	pass

	return StreamingResponse(
	generate(),
	media_type="text/event-stream",
	headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
	)