xvious5
feat: training scripts, edge-tts, Gradio TTS choice, NLP warmup
c478197
"""
FastAPI backend for Flutter / mobile clients.
Run (from project root, after installing requirements):
uvicorn api:app --host 0.0.0.0 --port 8000
Environment:
TTS_ENGINE=edge (default; neural TTS via edge-tts, headless-safe with HTTPS)
TTS_ENGINE=gtts
TTS_ENGINE=pyttsx3 (local desktop only)
"""
from __future__ import annotations
import base64
import logging
import tempfile
from pathlib import Path
from fastapi import FastAPI, File, HTTPException, Query, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
from voice_agent.nlp_module import get_capability_statement
from voice_agent.pipeline import run_text_turn, run_voice_turn_file, warmup_pipeline
logger = logging.getLogger(__name__)
LOG_FILE = Path(__file__).resolve().parent / "voice_agent" / "interaction_log.txt"
app = FastAPI(
title="Voice Admissions Agent API",
description="STT (Whisper) → semantic Q&A → TTS (gTTS).",
version="0.2.0",
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
class TextChatRequest(BaseModel):
message: str = Field(..., min_length=1, description="User message (typed)")
include_audio: bool = Field(False, description="If true, include base64-encoded reply audio")
class TextChatResponse(BaseModel):
ok: bool
transcript: str
reply: str
confidence: str
similarity: float | None = None
audio_base64: str | None = None
audio_mime: str | None = None
error: str | None = None
capability_statement: str | None = None
@app.on_event("startup")
def _startup() -> None:
warmup_pipeline()
logger.info("API startup: models warmed (corpus + NLP + optional Whisper).")
@app.get("/health")
def health() -> dict:
return {"status": "ok"}
@app.get("/v1/meta")
def meta() -> dict:
return {"capability_statement": get_capability_statement()}
@app.post("/v1/chat/text", response_model=TextChatResponse)
def chat_text(body: TextChatRequest) -> TextChatResponse:
r = run_text_turn(body.message.strip(), log_path=LOG_FILE)
if not r.ok:
return TextChatResponse(
ok=False,
transcript="",
reply="",
confidence="",
error=r.error or "Unknown error",
capability_statement=get_capability_statement(),
)
audio_b64 = None
mime = None
if body.include_audio and r.audio_path and Path(r.audio_path).exists():
data = Path(r.audio_path).read_bytes()
audio_b64 = base64.standard_b64encode(data).decode("ascii")
mime = "audio/mpeg" if r.audio_path.lower().endswith(".mp3") else "audio/wav"
return TextChatResponse(
ok=True,
transcript=r.transcript,
reply=r.agent_text,
confidence=r.confidence,
similarity=r.similarity,
audio_base64=audio_b64,
audio_mime=mime,
)
@app.post("/v1/chat/voice", response_model=TextChatResponse)
async def chat_voice(
audio: UploadFile = File(..., description="User recording (wav, webm, mp3, …)"),
include_audio: bool = Query(True, description="Include base64 reply audio"),
) -> TextChatResponse:
suffix = Path(audio.filename or "upload").suffix or ".webm"
if suffix.lower() not in {".wav", ".webm", ".mp3", ".m4a", ".ogg", ".flac", ".mp4"}:
suffix = ".webm"
try:
raw = await audio.read()
if not raw:
raise HTTPException(status_code=400, detail="Empty audio upload")
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
tmp.write(raw)
tmp_path = tmp.name
try:
r = run_voice_turn_file(tmp_path, log_path=LOG_FILE)
finally:
try:
Path(tmp_path).unlink(missing_ok=True)
except OSError:
pass
except HTTPException:
raise
except Exception as e:
logger.exception("chat_voice failed")
return TextChatResponse(
ok=False,
transcript="",
reply="",
confidence="",
error=str(e),
capability_statement=get_capability_statement(),
)
if not r.ok:
return TextChatResponse(
ok=False,
transcript="",
reply="",
confidence="",
error=r.error or "Unknown error",
capability_statement=get_capability_statement(),
)
audio_b64 = None
mime = None
if include_audio and r.audio_path and Path(r.audio_path).exists():
data = Path(r.audio_path).read_bytes()
audio_b64 = base64.standard_b64encode(data).decode("ascii")
mime = "audio/mpeg" if r.audio_path.lower().endswith(".mp3") else "audio/wav"
return TextChatResponse(
ok=True,
transcript=r.transcript,
reply=r.agent_text,
confidence=r.confidence,
similarity=r.similarity,
audio_base64=audio_b64,
audio_mime=mime,
)