Spaces:
Sleeping
Sleeping
| # SPDX-License-Identifier: AGPL-3.0-or-later | |
| # Copyright (c) 2024-2025 Felipe Maya Muniz | |
| """ | |
| Production Hugging Face Space for AletheionGuard. | |
| This endpoint loads the trained neural models and provides accurate | |
| epistemic uncertainty estimation using the full AletheionGuard architecture. | |
| """ | |
| from fastapi import FastAPI, HTTPException, Header | |
| from pydantic import BaseModel | |
| from typing import Optional | |
| import logging | |
| import math | |
| import torch | |
| import torch.nn as nn | |
| from sentence_transformers import SentenceTransformer | |
| from pathlib import Path | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| app = FastAPI( | |
| title="AletheionGuard HF Space", | |
| description="Production epistemic uncertainty estimation", | |
| version="2.0.0" | |
| ) | |
| # ============================================================================ | |
| # Model Definitions (copied from q1q2_gates.py) | |
| # ============================================================================ | |
| class UncertaintyNetwork(nn.Module): | |
| """Base neural network for uncertainty estimation.""" | |
| def __init__( | |
| self, | |
| input_dim: int = 384, | |
| hidden_dim: int = 256, | |
| num_layers: int = 3, | |
| dropout: float = 0.1 | |
| ): | |
| super().__init__() | |
| self.input_dim = input_dim | |
| self.hidden_dim = hidden_dim | |
| self.num_layers = num_layers | |
| # Build MLP layers | |
| layers = [] | |
| # Input layer | |
| layers.append(nn.Linear(input_dim, hidden_dim)) | |
| layers.append(nn.ReLU()) | |
| layers.append(nn.Dropout(dropout)) | |
| # Hidden layers | |
| for _ in range(num_layers - 1): | |
| layers.append(nn.Linear(hidden_dim, hidden_dim)) | |
| layers.append(nn.ReLU()) | |
| layers.append(nn.Dropout(dropout)) | |
| # Output layer (single uncertainty value) | |
| layers.append(nn.Linear(hidden_dim, 1)) | |
| layers.append(nn.Sigmoid()) # Clamp to [0, 1] | |
| self.network = nn.Sequential(*layers) | |
| def forward(self, x: torch.Tensor) -> torch.Tensor: | |
| if x.dim() == 1: | |
| x = x.unsqueeze(0) | |
| single_sample = True | |
| else: | |
| single_sample = False | |
| output = self.network(x) | |
| if single_sample: | |
| output = output.squeeze(0) | |
| return output | |
| class Q1Gate(nn.Module): | |
| """Aleatoric uncertainty gate (Q1).""" | |
| def __init__(self, input_dim: int = 384, hidden_dim: int = 256): | |
| super().__init__() | |
| self.network = UncertaintyNetwork( | |
| input_dim=input_dim, | |
| hidden_dim=hidden_dim, | |
| num_layers=3, | |
| dropout=0.1 | |
| ) | |
| def forward(self, embeddings: torch.Tensor) -> torch.Tensor: | |
| return self.network(embeddings) | |
| class Q2Gate(nn.Module): | |
| """Epistemic uncertainty gate (Q2) - conditioned on Q1.""" | |
| def __init__(self, input_dim: int = 384, hidden_dim: int = 256): | |
| super().__init__() | |
| # Q2 is conditioned on Q1, so input is embeddings + Q1 value | |
| self.network = UncertaintyNetwork( | |
| input_dim=input_dim + 1, # +1 for Q1 conditioning | |
| hidden_dim=hidden_dim, | |
| num_layers=3, | |
| dropout=0.1 | |
| ) | |
| def forward(self, embeddings: torch.Tensor, q1: torch.Tensor) -> torch.Tensor: | |
| # Handle single sample | |
| if embeddings.dim() == 1: | |
| embeddings = embeddings.unsqueeze(0) | |
| single_sample = True | |
| else: | |
| single_sample = False | |
| # Convert Q1 to tensor if needed | |
| if isinstance(q1, float): | |
| q1 = torch.tensor([[q1]], dtype=embeddings.dtype, device=embeddings.device) | |
| elif q1.dim() == 0: | |
| q1 = q1.unsqueeze(0).unsqueeze(0) | |
| elif q1.dim() == 1: | |
| q1 = q1.unsqueeze(1) | |
| # Concatenate embeddings with Q1 for conditioning | |
| combined = torch.cat([embeddings, q1], dim=1) | |
| output = self.network(combined) | |
| if single_sample: | |
| output = output.squeeze(0) | |
| return output | |
| # ============================================================================ | |
| # Global Model State | |
| # ============================================================================ | |
| class ModelState: | |
| """Global state for loaded models.""" | |
| def __init__(self): | |
| self.encoder = None | |
| self.q1_gate = None | |
| self.q2_gate = None | |
| self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| self.loaded = False | |
| def load_models(self): | |
| """Load all models at startup.""" | |
| if self.loaded: | |
| return | |
| try: | |
| logger.info("🔧 Loading models...") | |
| # 1. Load sentence transformer for embeddings | |
| logger.info(" Loading sentence transformer...") | |
| self.encoder = SentenceTransformer('all-MiniLM-L6-v2') | |
| self.encoder.eval() | |
| logger.info(" ✓ Sentence transformer loaded") | |
| # 2. Load Q1 gate | |
| logger.info(" Loading Q1 gate...") | |
| self.q1_gate = Q1Gate(input_dim=384, hidden_dim=256) | |
| if Path('q1_gate.pth').exists(): | |
| self.q1_gate.load_state_dict(torch.load('q1_gate.pth', map_location=self.device)) | |
| logger.info(" ✓ Q1 gate loaded from q1_gate.pth") | |
| else: | |
| logger.warning(" ⚠️ q1_gate.pth not found, using random weights") | |
| self.q1_gate.to(self.device) | |
| self.q1_gate.eval() | |
| # 3. Load Q2 gate | |
| logger.info(" Loading Q2 gate...") | |
| self.q2_gate = Q2Gate(input_dim=384, hidden_dim=256) | |
| if Path('q2_gate.pth').exists(): | |
| self.q2_gate.load_state_dict(torch.load('q2_gate.pth', map_location=self.device)) | |
| logger.info(" ✓ Q2 gate loaded from q2_gate.pth") | |
| else: | |
| logger.warning(" ⚠️ q2_gate.pth not found, using random weights") | |
| self.q2_gate.to(self.device) | |
| self.q2_gate.eval() | |
| self.loaded = True | |
| logger.info(f"✅ All models loaded successfully (device: {self.device})") | |
| except Exception as e: | |
| logger.error(f"❌ Failed to load models: {e}") | |
| raise | |
| # Global model state | |
| models = ModelState() | |
| # ============================================================================ | |
| # API Models | |
| # ============================================================================ | |
| class PredictRequest(BaseModel): | |
| """Request model for /predict endpoint.""" | |
| text: str | |
| context: Optional[str] = None | |
| class PredictResponse(BaseModel): | |
| """Response model for /predict endpoint.""" | |
| q1: float | |
| q2: float | |
| height: float | |
| message: str | |
| verdict: Optional[str] = None | |
| def get_verdict(q1: float, q2: float, height: float) -> str: | |
| """ | |
| Calculate verdict using official epistemic rule. | |
| Official epistemic rule: | |
| - u = 1.0 - height (total uncertainty) | |
| - If q2 >= 0.35 OR u >= 0.60 → REFUSED | |
| - If q1 >= 0.35 OR (0.30 <= u < 0.60) → MAYBE | |
| - Otherwise → ACCEPT | |
| """ | |
| u = 1.0 - height # Total uncertainty | |
| if q2 >= 0.35 or u >= 0.60: | |
| return "REFUSED" | |
| if q1 >= 0.35 or (0.30 <= u < 0.60): | |
| return "MAYBE" | |
| return "ACCEPT" | |
| # ============================================================================ | |
| # API Endpoints | |
| # ============================================================================ | |
| async def startup_event(): | |
| """Load models on startup.""" | |
| models.load_models() | |
| def root(): | |
| """Root endpoint.""" | |
| return { | |
| "name": "AletheionGuard HF Space", | |
| "version": "2.0.0", | |
| "status": "operational", | |
| "models_loaded": models.loaded | |
| } | |
| def predict( | |
| request: PredictRequest, | |
| authorization: str = Header(...) | |
| ): | |
| """ | |
| Predict endpoint using trained neural models. | |
| Returns epistemic uncertainty metrics (q1, q2, height) computed by | |
| the trained AletheionGuard neural networks. | |
| Args: | |
| request: Text and optional context | |
| authorization: Bearer token (verified by HF automatically) | |
| Returns: | |
| Neural-computed metrics with verdict | |
| Example: | |
| >>> POST /predict | |
| >>> Headers: Authorization: Bearer hf_... | |
| >>> Body: {"text": "Paris is the capital of France", "context": "geography"} | |
| >>> Response: {"q1": 0.08, "q2": 0.12, "height": 0.86, "verdict": "ACCEPT"} | |
| """ | |
| try: | |
| if not models.loaded: | |
| raise HTTPException(status_code=503, detail="Models not loaded") | |
| logger.info(f"Received prediction request - text_length={len(request.text)}") | |
| # Combine text and context for embedding | |
| full_text = request.text | |
| if request.context: | |
| full_text = f"{request.context}: {request.text}" | |
| # 1. Get embeddings from sentence transformer | |
| with torch.no_grad(): | |
| embeddings = models.encoder.encode( | |
| full_text, | |
| convert_to_tensor=True, | |
| device=models.device | |
| ) | |
| # 2. Compute Q1 (aleatoric uncertainty) | |
| q1_tensor = models.q1_gate(embeddings) | |
| q1 = float(q1_tensor.item()) | |
| # 3. Compute Q2 (epistemic uncertainty) - conditioned on Q1 | |
| q2_tensor = models.q2_gate(embeddings, q1_tensor) | |
| q2 = float(q2_tensor.item()) | |
| # 4. Compute height from pyramidal formula | |
| # height = 1 - sqrt(q1^2 + q2^2) | |
| height = max(0.0, min(1.0, 1.0 - math.sqrt(q1**2 + q2**2))) | |
| # 5. Calculate verdict | |
| verdict = get_verdict(q1, q2, height) | |
| logger.info(f"Prediction: q1={q1:.3f}, q2={q2:.3f}, height={height:.3f}, verdict={verdict}") | |
| return PredictResponse( | |
| q1=round(q1, 3), | |
| q2=round(q2, 3), | |
| height=round(height, 3), | |
| message="Neural metrics computed successfully.", | |
| verdict=verdict | |
| ) | |
| except HTTPException: | |
| raise | |
| except Exception as e: | |
| logger.error(f"Prediction failed: {str(e)}") | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| def health(): | |
| """Health check endpoint.""" | |
| return { | |
| "status": "healthy", | |
| "models_loaded": models.loaded, | |
| "device": str(models.device) | |
| } | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) | |