| """FastAPI service for Verifiable Dictation Suite. |
| |
| Full pipeline: audio β STT β claim extraction β QKL lattice β XGML graph |
| β Pi Sheriff Node attestation β XAL envelope. |
| |
| Aevion LLC | SDVOSB | CAGE 15NV7 |
| """ |
|
|
| import hashlib |
| import uuid |
| from pathlib import Path |
| import sys |
|
|
| from fastapi import FastAPI, UploadFile |
| from fastapi.responses import JSONResponse |
|
|
| |
| try: |
| from avds.attestation import generate_receipt, verify_receipt |
| from avds.translators.qkl import build_qkl_lattice |
| from avds.translators.xal import wrap_as_xal |
| from avds.translators.xgml import build_xgml_graph |
| except ImportError: |
| sys.path.insert(0, str(Path(__file__).parent)) |
| from attestation import generate_receipt, verify_receipt |
| from translators.qkl import build_qkl_lattice |
| from translators.xal import wrap_as_xal |
| from translators.xgml import build_xgml_graph |
|
|
| app = FastAPI(title="AVDS β Verifiable Dictation Suite") |
|
|
| |
| sessions: dict[str, dict] = {} |
|
|
|
|
| @app.post("/transcribe") |
| async def transcribe(file: UploadFile) -> JSONResponse: |
| """Accept audio, run full AVDS pipeline, return XAL envelope.""" |
| content = await file.read() |
| audio_hash = hashlib.sha256(content).hexdigest() |
| session_id = f"avds-{uuid.uuid4().hex[:12]}" |
|
|
| |
| |
| transcript = f"[MOCK TRANSCRIPT for {audio_hash[:8]}]" |
|
|
| |
| stt_model = "voxtral-mini-2507" |
| receipt = await generate_receipt(audio_hash, transcript, stt_model) |
|
|
| |
| claims = _extract_claims(transcript) |
|
|
| |
| qkl = build_qkl_lattice(claims, session_id) |
|
|
| |
| xgml = build_xgml_graph( |
| session_id=session_id, |
| audio_hash=audio_hash, |
| transcript=transcript, |
| stt_model=stt_model, |
| receipt=receipt, |
| claims_count=len(claims), |
| ) |
|
|
| |
| xal = wrap_as_xal(transcript, receipt) |
| xal["qkl"] = qkl |
| xal["xgml"] = xgml |
|
|
| |
| sessions[session_id] = { |
| "audio_hash": audio_hash, |
| "transcript": transcript, |
| "receipt": receipt, |
| "claims": claims, |
| "qkl": qkl, |
| "xgml": xgml, |
| } |
|
|
| return JSONResponse(content=xal) |
|
|
|
|
| @app.post("/verify") |
| async def verify(receipt: dict) -> dict: |
| """Verify a receipt's signature and integrity.""" |
| valid = verify_receipt(receipt) |
| return {"valid": valid, "receipt_id": receipt.get("receipt_id")} |
|
|
|
|
| @app.get("/health") |
| async def health() -> dict: |
| """Health check endpoint.""" |
| return { |
| "status": "ok", |
| "sessions": len(sessions), |
| "pipeline": ["stt", "receipt", "claims", "qkl", "xgml", "xal"], |
| } |
|
|
|
|
| @app.get("/session/{session_id}") |
| async def get_session(session_id: str) -> JSONResponse: |
| """Retrieve a stored session by ID.""" |
| if session_id in sessions: |
| return JSONResponse(content=sessions[session_id]) |
| return JSONResponse(content={"error": "session not found"}, status_code=404) |
|
|
|
|
| def _extract_claims(transcript: str) -> list[dict]: |
| """Extract structured claims from a transcript. |
| |
| Currently returns mock claims. In production, this calls a |
| cleanup LLM (Qwen2.5 or similar) to extract factual claims |
| with confidence scores. |
| """ |
| if not transcript or transcript.startswith("[MOCK"): |
| return [{"text": "mock claim", "confidence": 0.5, "source": "stt"}] |
| |
| sentences = [s.strip() for s in transcript.split(".") if s.strip()] |
| return [ |
| {"text": s, "confidence": 0.7, "source": "stt"} |
| for s in sentences[:10] |
| ] |