Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import os | |
| import tempfile | |
| from fastapi import FastAPI, File, Form, HTTPException, UploadFile | |
| try: | |
| from .config import VoiceRuntimeConfig | |
| from .service import process_diarization_only, process_voice | |
| except ImportError: # HF flat-root execution fallback | |
| from config import VoiceRuntimeConfig | |
| from service import process_diarization_only, process_voice | |
| app = FastAPI(title="Voice Intelligence Module", version="1.0.0") | |
| _ALLOWED = {".wav", ".mp3", ".m4a", ".ogg", ".flac", ".aac", ".mp4", ".mov", ".mkv", ".webm"} | |
| def _save_upload_temp(upload: UploadFile) -> str: | |
| filename = upload.filename or "input_audio" | |
| ext = os.path.splitext(filename)[1].lower() | |
| if ext not in _ALLOWED: | |
| allowed = ", ".join(sorted(_ALLOWED)) | |
| raise HTTPException(status_code=400, detail=f"Unsupported input extension: {ext or 'unknown'}. Allowed: {allowed}") | |
| fd, tmp_path = tempfile.mkstemp(prefix="voice-intel-upload-", suffix=ext) | |
| os.close(fd) | |
| with open(tmp_path, "wb") as f: | |
| while True: | |
| chunk = upload.file.read(1024 * 1024) | |
| if not chunk: | |
| break | |
| f.write(chunk) | |
| return tmp_path | |
| def health() -> dict: | |
| return {"ok": True, "module": "voice-intelligence"} | |
| def align_trimmed( | |
| file: UploadFile = File(...), | |
| language_hint: str = Form("auto"), | |
| silence_threshold_db: float = Form(-40.0), | |
| min_silence_sec: float = Form(0.30), | |
| keep_padding_sec: float = Form(0.05), | |
| analysis_window_ms: int = Form(10), | |
| diarization_enabled: str = Form("true"), | |
| diarization_min_speakers: int = Form(0), | |
| diarization_max_speakers: int = Form(0), | |
| ) -> dict: | |
| tmp_path = _save_upload_temp(file) | |
| config = VoiceRuntimeConfig.from_env() | |
| config.silence_threshold_db = silence_threshold_db | |
| config.min_silence_sec = min_silence_sec | |
| config.keep_padding_sec = keep_padding_sec | |
| config.analysis_window_ms = analysis_window_ms | |
| config.diarization_enabled = diarization_enabled.strip().lower() in ("1", "true", "yes", "on") | |
| config.diarization_min_speakers = diarization_min_speakers | |
| config.diarization_max_speakers = diarization_max_speakers | |
| try: | |
| response = process_voice( | |
| input_audio_path=tmp_path, | |
| config=config, | |
| language_hint=language_hint, | |
| trim_silence_enabled=True, | |
| include_audio_payload=True, | |
| minimal_output=False, | |
| ) | |
| response["endpoint"] = "/v1/voice/align/trimmed" | |
| response["description"] = "Returns word-level data and silence-removed WAV payload." | |
| return response | |
| except HTTPException: | |
| raise | |
| except Exception as exc: | |
| raise HTTPException(status_code=500, detail=f"trimmed endpoint failed: {exc}") from exc | |
| finally: | |
| try: | |
| os.remove(tmp_path) | |
| except OSError: | |
| pass | |
| def align_raw( | |
| file: UploadFile = File(...), | |
| language_hint: str = Form("auto"), | |
| diarization_enabled: str = Form("true"), | |
| diarization_min_speakers: int = Form(0), | |
| diarization_max_speakers: int = Form(0), | |
| ) -> dict: | |
| tmp_path = _save_upload_temp(file) | |
| config = VoiceRuntimeConfig.from_env() | |
| config.diarization_enabled = diarization_enabled.strip().lower() in ("1", "true", "yes", "on") | |
| config.diarization_min_speakers = diarization_min_speakers | |
| config.diarization_max_speakers = diarization_max_speakers | |
| try: | |
| response = process_voice( | |
| input_audio_path=tmp_path, | |
| config=config, | |
| language_hint=language_hint, | |
| trim_silence_enabled=False, | |
| include_audio_payload=False, | |
| minimal_output=True, | |
| ) | |
| response["endpoint"] = "/v1/voice/align/raw" | |
| response["description"] = "Returns only required alignment data; input audio remains unchanged." | |
| return response | |
| except HTTPException: | |
| raise | |
| except Exception as exc: | |
| raise HTTPException(status_code=500, detail=f"raw endpoint failed: {exc}") from exc | |
| finally: | |
| try: | |
| os.remove(tmp_path) | |
| except OSError: | |
| pass | |
| def diarization_only( | |
| file: UploadFile = File(...), | |
| diarization_enabled: str = Form("true"), | |
| diarization_min_speakers: int = Form(0), | |
| diarization_max_speakers: int = Form(0), | |
| ) -> dict: | |
| tmp_path = _save_upload_temp(file) | |
| config = VoiceRuntimeConfig.from_env() | |
| config.diarization_enabled = diarization_enabled.strip().lower() in ("1", "true", "yes", "on") | |
| config.diarization_min_speakers = diarization_min_speakers | |
| config.diarization_max_speakers = diarization_max_speakers | |
| try: | |
| response = process_diarization_only( | |
| input_audio_path=tmp_path, | |
| config=config, | |
| ) | |
| response["endpoint"] = "/v1/voice/diarization" | |
| response["description"] = "Returns diarization only (speaker segments + summary), no transcription payload." | |
| return response | |
| except HTTPException: | |
| raise | |
| except Exception as exc: | |
| raise HTTPException(status_code=500, detail=f"diarization endpoint failed: {exc}") from exc | |
| finally: | |
| try: | |
| os.remove(tmp_path) | |
| except OSError: | |
| pass | |
| if __name__ == "__main__": | |
| import uvicorn | |
| port = int(os.environ.get("PORT", "8091")) | |
| uvicorn.run("core.voice_intelligence.api:app", host="0.0.0.0", port=port, reload=False) | |