avds / main.py
aevionai's picture
Initial AVDS deployment
58acc34
"""FastAPI service for Verifiable Dictation Suite.
Full pipeline: audio β†’ STT β†’ claim extraction β†’ QKL lattice β†’ XGML graph
β†’ Pi Sheriff Node attestation β†’ XAL envelope.
Aevion LLC | SDVOSB | CAGE 15NV7
"""
import hashlib
import uuid
from pathlib import Path
import sys
from fastapi import FastAPI, UploadFile
from fastapi.responses import JSONResponse
# Support both package and standalone imports for HF Spaces / Vercel
try:
from avds.attestation import generate_receipt, verify_receipt
from avds.translators.qkl import build_qkl_lattice
from avds.translators.xal import wrap_as_xal
from avds.translators.xgml import build_xgml_graph
except ImportError:
sys.path.insert(0, str(Path(__file__).parent))
from attestation import generate_receipt, verify_receipt
from translators.qkl import build_qkl_lattice
from translators.xal import wrap_as_xal
from translators.xgml import build_xgml_graph
app = FastAPI(title="AVDS β€” Verifiable Dictation Suite")
# Session storage (in-memory for single-instance; use Redis for production)
sessions: dict[str, dict] = {}
@app.post("/transcribe")
async def transcribe(file: UploadFile) -> JSONResponse:
"""Accept audio, run full AVDS pipeline, return XAL envelope."""
content = await file.read()
audio_hash = hashlib.sha256(content).hexdigest()
session_id = f"avds-{uuid.uuid4().hex[:12]}"
# Step 1: Transcribe (currently mock; swap for Voxtral/Mistral STT)
# transcript = await call_stt_api(content, file.content_type)
transcript = f"[MOCK TRANSCRIPT for {audio_hash[:8]}]"
# Step 2: Generate hardware-attested receipt
stt_model = "voxtral-mini-2507"
receipt = await generate_receipt(audio_hash, transcript, stt_model)
# Step 3: Extract claims from transcript
claims = _extract_claims(transcript)
# Step 4: Build QKL lattice
qkl = build_qkl_lattice(claims, session_id)
# Step 5: Build XGML audit graph
xgml = build_xgml_graph(
session_id=session_id,
audio_hash=audio_hash,
transcript=transcript,
stt_model=stt_model,
receipt=receipt,
claims_count=len(claims),
)
# Step 6: Wrap in XAL envelope
xal = wrap_as_xal(transcript, receipt)
xal["qkl"] = qkl
xal["xgml"] = xgml
# Store session
sessions[session_id] = {
"audio_hash": audio_hash,
"transcript": transcript,
"receipt": receipt,
"claims": claims,
"qkl": qkl,
"xgml": xgml,
}
return JSONResponse(content=xal)
@app.post("/verify")
async def verify(receipt: dict) -> dict:
"""Verify a receipt's signature and integrity."""
valid = verify_receipt(receipt)
return {"valid": valid, "receipt_id": receipt.get("receipt_id")}
@app.get("/health")
async def health() -> dict:
"""Health check endpoint."""
return {
"status": "ok",
"sessions": len(sessions),
"pipeline": ["stt", "receipt", "claims", "qkl", "xgml", "xal"],
}
@app.get("/session/{session_id}")
async def get_session(session_id: str) -> JSONResponse:
"""Retrieve a stored session by ID."""
if session_id in sessions:
return JSONResponse(content=sessions[session_id])
return JSONResponse(content={"error": "session not found"}, status_code=404)
def _extract_claims(transcript: str) -> list[dict]:
"""Extract structured claims from a transcript.
Currently returns mock claims. In production, this calls a
cleanup LLM (Qwen2.5 or similar) to extract factual claims
with confidence scores.
"""
if not transcript or transcript.startswith("[MOCK"):
return [{"text": "mock claim", "confidence": 0.5, "source": "stt"}]
# Split on sentence boundaries for claim extraction
sentences = [s.strip() for s in transcript.split(".") if s.strip()]
return [
{"text": s, "confidence": 0.7, "source": "stt"}
for s in sentences[:10] # Cap at 10 claims
]