# SPDX-License-Identifier: AGPL-3.0-or-later # Copyright (c) 2024-2025 Felipe Maya Muniz """ Production Hugging Face Space for AletheionGuard. This endpoint loads the trained neural models and provides accurate epistemic uncertainty estimation using the full AletheionGuard architecture. """ from fastapi import FastAPI, HTTPException, Header from pydantic import BaseModel from typing import Optional import logging import math import torch import torch.nn as nn from sentence_transformers import SentenceTransformer from pathlib import Path logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) app = FastAPI( title="AletheionGuard HF Space", description="Production epistemic uncertainty estimation", version="2.0.0" ) # ============================================================================ # Model Definitions (copied from q1q2_gates.py) # ============================================================================ class UncertaintyNetwork(nn.Module): """Base neural network for uncertainty estimation.""" def __init__( self, input_dim: int = 384, hidden_dim: int = 256, num_layers: int = 3, dropout: float = 0.1 ): super().__init__() self.input_dim = input_dim self.hidden_dim = hidden_dim self.num_layers = num_layers # Build MLP layers layers = [] # Input layer layers.append(nn.Linear(input_dim, hidden_dim)) layers.append(nn.ReLU()) layers.append(nn.Dropout(dropout)) # Hidden layers for _ in range(num_layers - 1): layers.append(nn.Linear(hidden_dim, hidden_dim)) layers.append(nn.ReLU()) layers.append(nn.Dropout(dropout)) # Output layer (single uncertainty value) layers.append(nn.Linear(hidden_dim, 1)) layers.append(nn.Sigmoid()) # Clamp to [0, 1] self.network = nn.Sequential(*layers) def forward(self, x: torch.Tensor) -> torch.Tensor: if x.dim() == 1: x = x.unsqueeze(0) single_sample = True else: single_sample = False output = self.network(x) if single_sample: output = output.squeeze(0) return output class Q1Gate(nn.Module): """Aleatoric uncertainty gate (Q1).""" def __init__(self, input_dim: int = 384, hidden_dim: int = 256): super().__init__() self.network = UncertaintyNetwork( input_dim=input_dim, hidden_dim=hidden_dim, num_layers=3, dropout=0.1 ) def forward(self, embeddings: torch.Tensor) -> torch.Tensor: return self.network(embeddings) class Q2Gate(nn.Module): """Epistemic uncertainty gate (Q2) - conditioned on Q1.""" def __init__(self, input_dim: int = 384, hidden_dim: int = 256): super().__init__() # Q2 is conditioned on Q1, so input is embeddings + Q1 value self.network = UncertaintyNetwork( input_dim=input_dim + 1, # +1 for Q1 conditioning hidden_dim=hidden_dim, num_layers=3, dropout=0.1 ) def forward(self, embeddings: torch.Tensor, q1: torch.Tensor) -> torch.Tensor: # Handle single sample if embeddings.dim() == 1: embeddings = embeddings.unsqueeze(0) single_sample = True else: single_sample = False # Convert Q1 to tensor if needed if isinstance(q1, float): q1 = torch.tensor([[q1]], dtype=embeddings.dtype, device=embeddings.device) elif q1.dim() == 0: q1 = q1.unsqueeze(0).unsqueeze(0) elif q1.dim() == 1: q1 = q1.unsqueeze(1) # Concatenate embeddings with Q1 for conditioning combined = torch.cat([embeddings, q1], dim=1) output = self.network(combined) if single_sample: output = output.squeeze(0) return output # ============================================================================ # Global Model State # ============================================================================ class ModelState: """Global state for loaded models.""" def __init__(self): self.encoder = None self.q1_gate = None self.q2_gate = None self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.loaded = False def load_models(self): """Load all models at startup.""" if self.loaded: return try: logger.info("🔧 Loading models...") # 1. Load sentence transformer for embeddings logger.info(" Loading sentence transformer...") self.encoder = SentenceTransformer('all-MiniLM-L6-v2') self.encoder.eval() logger.info(" ✓ Sentence transformer loaded") # 2. Load Q1 gate logger.info(" Loading Q1 gate...") self.q1_gate = Q1Gate(input_dim=384, hidden_dim=256) if Path('q1_gate.pth').exists(): self.q1_gate.load_state_dict(torch.load('q1_gate.pth', map_location=self.device)) logger.info(" ✓ Q1 gate loaded from q1_gate.pth") else: logger.warning(" ⚠️ q1_gate.pth not found, using random weights") self.q1_gate.to(self.device) self.q1_gate.eval() # 3. Load Q2 gate logger.info(" Loading Q2 gate...") self.q2_gate = Q2Gate(input_dim=384, hidden_dim=256) if Path('q2_gate.pth').exists(): self.q2_gate.load_state_dict(torch.load('q2_gate.pth', map_location=self.device)) logger.info(" ✓ Q2 gate loaded from q2_gate.pth") else: logger.warning(" ⚠️ q2_gate.pth not found, using random weights") self.q2_gate.to(self.device) self.q2_gate.eval() self.loaded = True logger.info(f"✅ All models loaded successfully (device: {self.device})") except Exception as e: logger.error(f"❌ Failed to load models: {e}") raise # Global model state models = ModelState() # ============================================================================ # API Models # ============================================================================ class PredictRequest(BaseModel): """Request model for /predict endpoint.""" text: str context: Optional[str] = None class PredictResponse(BaseModel): """Response model for /predict endpoint.""" q1: float q2: float height: float message: str verdict: Optional[str] = None def get_verdict(q1: float, q2: float, height: float) -> str: """ Calculate verdict using official epistemic rule. Official epistemic rule: - u = 1.0 - height (total uncertainty) - If q2 >= 0.35 OR u >= 0.60 → REFUSED - If q1 >= 0.35 OR (0.30 <= u < 0.60) → MAYBE - Otherwise → ACCEPT """ u = 1.0 - height # Total uncertainty if q2 >= 0.35 or u >= 0.60: return "REFUSED" if q1 >= 0.35 or (0.30 <= u < 0.60): return "MAYBE" return "ACCEPT" # ============================================================================ # API Endpoints # ============================================================================ @app.on_event("startup") async def startup_event(): """Load models on startup.""" models.load_models() @app.get("/") def root(): """Root endpoint.""" return { "name": "AletheionGuard HF Space", "version": "2.0.0", "status": "operational", "models_loaded": models.loaded } @app.post("/predict", response_model=PredictResponse) def predict( request: PredictRequest, authorization: str = Header(...) ): """ Predict endpoint using trained neural models. Returns epistemic uncertainty metrics (q1, q2, height) computed by the trained AletheionGuard neural networks. Args: request: Text and optional context authorization: Bearer token (verified by HF automatically) Returns: Neural-computed metrics with verdict Example: >>> POST /predict >>> Headers: Authorization: Bearer hf_... >>> Body: {"text": "Paris is the capital of France", "context": "geography"} >>> Response: {"q1": 0.08, "q2": 0.12, "height": 0.86, "verdict": "ACCEPT"} """ try: if not models.loaded: raise HTTPException(status_code=503, detail="Models not loaded") logger.info(f"Received prediction request - text_length={len(request.text)}") # Combine text and context for embedding full_text = request.text if request.context: full_text = f"{request.context}: {request.text}" # 1. Get embeddings from sentence transformer with torch.no_grad(): embeddings = models.encoder.encode( full_text, convert_to_tensor=True, device=models.device ) # 2. Compute Q1 (aleatoric uncertainty) q1_tensor = models.q1_gate(embeddings) q1 = float(q1_tensor.item()) # 3. Compute Q2 (epistemic uncertainty) - conditioned on Q1 q2_tensor = models.q2_gate(embeddings, q1_tensor) q2 = float(q2_tensor.item()) # 4. Compute height from pyramidal formula # height = 1 - sqrt(q1^2 + q2^2) height = max(0.0, min(1.0, 1.0 - math.sqrt(q1**2 + q2**2))) # 5. Calculate verdict verdict = get_verdict(q1, q2, height) logger.info(f"Prediction: q1={q1:.3f}, q2={q2:.3f}, height={height:.3f}, verdict={verdict}") return PredictResponse( q1=round(q1, 3), q2=round(q2, 3), height=round(height, 3), message="Neural metrics computed successfully.", verdict=verdict ) except HTTPException: raise except Exception as e: logger.error(f"Prediction failed: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) @app.get("/health") def health(): """Health check endpoint.""" return { "status": "healthy", "models_loaded": models.loaded, "device": str(models.device) } if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)