"""FastAPI service for Verifiable Dictation Suite. Full pipeline: audio → STT → claim extraction → QKL lattice → XGML graph → Pi Sheriff Node attestation → XAL envelope. Aevion LLC | SDVOSB | CAGE 15NV7 """ import hashlib import uuid from pathlib import Path import sys from fastapi import FastAPI, UploadFile from fastapi.responses import JSONResponse # Support both package and standalone imports for HF Spaces / Vercel try: from avds.attestation import generate_receipt, verify_receipt from avds.translators.qkl import build_qkl_lattice from avds.translators.xal import wrap_as_xal from avds.translators.xgml import build_xgml_graph except ImportError: sys.path.insert(0, str(Path(__file__).parent)) from attestation import generate_receipt, verify_receipt from translators.qkl import build_qkl_lattice from translators.xal import wrap_as_xal from translators.xgml import build_xgml_graph app = FastAPI(title="AVDS — Verifiable Dictation Suite") # Session storage (in-memory for single-instance; use Redis for production) sessions: dict[str, dict] = {} @app.post("/transcribe") async def transcribe(file: UploadFile) -> JSONResponse: """Accept audio, run full AVDS pipeline, return XAL envelope.""" content = await file.read() audio_hash = hashlib.sha256(content).hexdigest() session_id = f"avds-{uuid.uuid4().hex[:12]}" # Step 1: Transcribe (currently mock; swap for Voxtral/Mistral STT) # transcript = await call_stt_api(content, file.content_type) transcript = f"[MOCK TRANSCRIPT for {audio_hash[:8]}]" # Step 2: Generate hardware-attested receipt stt_model = "voxtral-mini-2507" receipt = await generate_receipt(audio_hash, transcript, stt_model) # Step 3: Extract claims from transcript claims = _extract_claims(transcript) # Step 4: Build QKL lattice qkl = build_qkl_lattice(claims, session_id) # Step 5: Build XGML audit graph xgml = build_xgml_graph( session_id=session_id, audio_hash=audio_hash, transcript=transcript, stt_model=stt_model, receipt=receipt, claims_count=len(claims), ) # Step 6: Wrap in XAL envelope xal = wrap_as_xal(transcript, receipt) xal["qkl"] = qkl xal["xgml"] = xgml # Store session sessions[session_id] = { "audio_hash": audio_hash, "transcript": transcript, "receipt": receipt, "claims": claims, "qkl": qkl, "xgml": xgml, } return JSONResponse(content=xal) @app.post("/verify") async def verify(receipt: dict) -> dict: """Verify a receipt's signature and integrity.""" valid = verify_receipt(receipt) return {"valid": valid, "receipt_id": receipt.get("receipt_id")} @app.get("/health") async def health() -> dict: """Health check endpoint.""" return { "status": "ok", "sessions": len(sessions), "pipeline": ["stt", "receipt", "claims", "qkl", "xgml", "xal"], } @app.get("/session/{session_id}") async def get_session(session_id: str) -> JSONResponse: """Retrieve a stored session by ID.""" if session_id in sessions: return JSONResponse(content=sessions[session_id]) return JSONResponse(content={"error": "session not found"}, status_code=404) def _extract_claims(transcript: str) -> list[dict]: """Extract structured claims from a transcript. Currently returns mock claims. In production, this calls a cleanup LLM (Qwen2.5 or similar) to extract factual claims with confidence scores. """ if not transcript or transcript.startswith("[MOCK"): return [{"text": "mock claim", "confidence": 0.5, "source": "stt"}] # Split on sentence boundaries for claim extraction sentences = [s.strip() for s in transcript.split(".") if s.strip()] return [ {"text": s, "confidence": 0.7, "source": "stt"} for s in sentences[:10] # Cap at 10 claims ]