unknownfriend00007's picture
Upload 11 files
0d0b668 verified
from __future__ import annotations
import os
import tempfile
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
try:
from .config import VoiceRuntimeConfig
from .service import process_diarization_only, process_voice
except ImportError: # HF flat-root execution fallback
from config import VoiceRuntimeConfig
from service import process_diarization_only, process_voice
app = FastAPI(title="Voice Intelligence Module", version="1.0.0")
_ALLOWED = {".wav", ".mp3", ".m4a", ".ogg", ".flac", ".aac", ".mp4", ".mov", ".mkv", ".webm"}
def _save_upload_temp(upload: UploadFile) -> str:
filename = upload.filename or "input_audio"
ext = os.path.splitext(filename)[1].lower()
if ext not in _ALLOWED:
allowed = ", ".join(sorted(_ALLOWED))
raise HTTPException(status_code=400, detail=f"Unsupported input extension: {ext or 'unknown'}. Allowed: {allowed}")
fd, tmp_path = tempfile.mkstemp(prefix="voice-intel-upload-", suffix=ext)
os.close(fd)
with open(tmp_path, "wb") as f:
while True:
chunk = upload.file.read(1024 * 1024)
if not chunk:
break
f.write(chunk)
return tmp_path
@app.get("/health")
def health() -> dict:
return {"ok": True, "module": "voice-intelligence"}
@app.post("/v1/voice/align/trimmed")
def align_trimmed(
file: UploadFile = File(...),
language_hint: str = Form("auto"),
silence_threshold_db: float = Form(-40.0),
min_silence_sec: float = Form(0.30),
keep_padding_sec: float = Form(0.05),
analysis_window_ms: int = Form(10),
diarization_enabled: str = Form("true"),
diarization_min_speakers: int = Form(0),
diarization_max_speakers: int = Form(0),
) -> dict:
tmp_path = _save_upload_temp(file)
config = VoiceRuntimeConfig.from_env()
config.silence_threshold_db = silence_threshold_db
config.min_silence_sec = min_silence_sec
config.keep_padding_sec = keep_padding_sec
config.analysis_window_ms = analysis_window_ms
config.diarization_enabled = diarization_enabled.strip().lower() in ("1", "true", "yes", "on")
config.diarization_min_speakers = diarization_min_speakers
config.diarization_max_speakers = diarization_max_speakers
try:
response = process_voice(
input_audio_path=tmp_path,
config=config,
language_hint=language_hint,
trim_silence_enabled=True,
include_audio_payload=True,
minimal_output=False,
)
response["endpoint"] = "/v1/voice/align/trimmed"
response["description"] = "Returns word-level data and silence-removed WAV payload."
return response
except HTTPException:
raise
except Exception as exc:
raise HTTPException(status_code=500, detail=f"trimmed endpoint failed: {exc}") from exc
finally:
try:
os.remove(tmp_path)
except OSError:
pass
@app.post("/v1/voice/align/raw")
def align_raw(
file: UploadFile = File(...),
language_hint: str = Form("auto"),
diarization_enabled: str = Form("true"),
diarization_min_speakers: int = Form(0),
diarization_max_speakers: int = Form(0),
) -> dict:
tmp_path = _save_upload_temp(file)
config = VoiceRuntimeConfig.from_env()
config.diarization_enabled = diarization_enabled.strip().lower() in ("1", "true", "yes", "on")
config.diarization_min_speakers = diarization_min_speakers
config.diarization_max_speakers = diarization_max_speakers
try:
response = process_voice(
input_audio_path=tmp_path,
config=config,
language_hint=language_hint,
trim_silence_enabled=False,
include_audio_payload=False,
minimal_output=True,
)
response["endpoint"] = "/v1/voice/align/raw"
response["description"] = "Returns only required alignment data; input audio remains unchanged."
return response
except HTTPException:
raise
except Exception as exc:
raise HTTPException(status_code=500, detail=f"raw endpoint failed: {exc}") from exc
finally:
try:
os.remove(tmp_path)
except OSError:
pass
@app.post("/v1/voice/diarization")
def diarization_only(
file: UploadFile = File(...),
diarization_enabled: str = Form("true"),
diarization_min_speakers: int = Form(0),
diarization_max_speakers: int = Form(0),
) -> dict:
tmp_path = _save_upload_temp(file)
config = VoiceRuntimeConfig.from_env()
config.diarization_enabled = diarization_enabled.strip().lower() in ("1", "true", "yes", "on")
config.diarization_min_speakers = diarization_min_speakers
config.diarization_max_speakers = diarization_max_speakers
try:
response = process_diarization_only(
input_audio_path=tmp_path,
config=config,
)
response["endpoint"] = "/v1/voice/diarization"
response["description"] = "Returns diarization only (speaker segments + summary), no transcription payload."
return response
except HTTPException:
raise
except Exception as exc:
raise HTTPException(status_code=500, detail=f"diarization endpoint failed: {exc}") from exc
finally:
try:
os.remove(tmp_path)
except OSError:
pass
if __name__ == "__main__":
import uvicorn
port = int(os.environ.get("PORT", "8091"))
uvicorn.run("core.voice_intelligence.api:app", host="0.0.0.0", port=port, reload=False)