AletheionGuard / app.py
gnai-creator
Fix: Load trained neural models instead of heuristics
2f5015f
# SPDX-License-Identifier: AGPL-3.0-or-later
# Copyright (c) 2024-2025 Felipe Maya Muniz
"""
Production Hugging Face Space for AletheionGuard.
This endpoint loads the trained neural models and provides accurate
epistemic uncertainty estimation using the full AletheionGuard architecture.
"""
from fastapi import FastAPI, HTTPException, Header
from pydantic import BaseModel
from typing import Optional
import logging
import math
import torch
import torch.nn as nn
from sentence_transformers import SentenceTransformer
from pathlib import Path
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app = FastAPI(
title="AletheionGuard HF Space",
description="Production epistemic uncertainty estimation",
version="2.0.0"
)
# ============================================================================
# Model Definitions (copied from q1q2_gates.py)
# ============================================================================
class UncertaintyNetwork(nn.Module):
"""Base neural network for uncertainty estimation."""
def __init__(
self,
input_dim: int = 384,
hidden_dim: int = 256,
num_layers: int = 3,
dropout: float = 0.1
):
super().__init__()
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.num_layers = num_layers
# Build MLP layers
layers = []
# Input layer
layers.append(nn.Linear(input_dim, hidden_dim))
layers.append(nn.ReLU())
layers.append(nn.Dropout(dropout))
# Hidden layers
for _ in range(num_layers - 1):
layers.append(nn.Linear(hidden_dim, hidden_dim))
layers.append(nn.ReLU())
layers.append(nn.Dropout(dropout))
# Output layer (single uncertainty value)
layers.append(nn.Linear(hidden_dim, 1))
layers.append(nn.Sigmoid()) # Clamp to [0, 1]
self.network = nn.Sequential(*layers)
def forward(self, x: torch.Tensor) -> torch.Tensor:
if x.dim() == 1:
x = x.unsqueeze(0)
single_sample = True
else:
single_sample = False
output = self.network(x)
if single_sample:
output = output.squeeze(0)
return output
class Q1Gate(nn.Module):
"""Aleatoric uncertainty gate (Q1)."""
def __init__(self, input_dim: int = 384, hidden_dim: int = 256):
super().__init__()
self.network = UncertaintyNetwork(
input_dim=input_dim,
hidden_dim=hidden_dim,
num_layers=3,
dropout=0.1
)
def forward(self, embeddings: torch.Tensor) -> torch.Tensor:
return self.network(embeddings)
class Q2Gate(nn.Module):
"""Epistemic uncertainty gate (Q2) - conditioned on Q1."""
def __init__(self, input_dim: int = 384, hidden_dim: int = 256):
super().__init__()
# Q2 is conditioned on Q1, so input is embeddings + Q1 value
self.network = UncertaintyNetwork(
input_dim=input_dim + 1, # +1 for Q1 conditioning
hidden_dim=hidden_dim,
num_layers=3,
dropout=0.1
)
def forward(self, embeddings: torch.Tensor, q1: torch.Tensor) -> torch.Tensor:
# Handle single sample
if embeddings.dim() == 1:
embeddings = embeddings.unsqueeze(0)
single_sample = True
else:
single_sample = False
# Convert Q1 to tensor if needed
if isinstance(q1, float):
q1 = torch.tensor([[q1]], dtype=embeddings.dtype, device=embeddings.device)
elif q1.dim() == 0:
q1 = q1.unsqueeze(0).unsqueeze(0)
elif q1.dim() == 1:
q1 = q1.unsqueeze(1)
# Concatenate embeddings with Q1 for conditioning
combined = torch.cat([embeddings, q1], dim=1)
output = self.network(combined)
if single_sample:
output = output.squeeze(0)
return output
# ============================================================================
# Global Model State
# ============================================================================
class ModelState:
"""Global state for loaded models."""
def __init__(self):
self.encoder = None
self.q1_gate = None
self.q2_gate = None
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.loaded = False
def load_models(self):
"""Load all models at startup."""
if self.loaded:
return
try:
logger.info("🔧 Loading models...")
# 1. Load sentence transformer for embeddings
logger.info(" Loading sentence transformer...")
self.encoder = SentenceTransformer('all-MiniLM-L6-v2')
self.encoder.eval()
logger.info(" ✓ Sentence transformer loaded")
# 2. Load Q1 gate
logger.info(" Loading Q1 gate...")
self.q1_gate = Q1Gate(input_dim=384, hidden_dim=256)
if Path('q1_gate.pth').exists():
self.q1_gate.load_state_dict(torch.load('q1_gate.pth', map_location=self.device))
logger.info(" ✓ Q1 gate loaded from q1_gate.pth")
else:
logger.warning(" ⚠️ q1_gate.pth not found, using random weights")
self.q1_gate.to(self.device)
self.q1_gate.eval()
# 3. Load Q2 gate
logger.info(" Loading Q2 gate...")
self.q2_gate = Q2Gate(input_dim=384, hidden_dim=256)
if Path('q2_gate.pth').exists():
self.q2_gate.load_state_dict(torch.load('q2_gate.pth', map_location=self.device))
logger.info(" ✓ Q2 gate loaded from q2_gate.pth")
else:
logger.warning(" ⚠️ q2_gate.pth not found, using random weights")
self.q2_gate.to(self.device)
self.q2_gate.eval()
self.loaded = True
logger.info(f"✅ All models loaded successfully (device: {self.device})")
except Exception as e:
logger.error(f"❌ Failed to load models: {e}")
raise
# Global model state
models = ModelState()
# ============================================================================
# API Models
# ============================================================================
class PredictRequest(BaseModel):
"""Request model for /predict endpoint."""
text: str
context: Optional[str] = None
class PredictResponse(BaseModel):
"""Response model for /predict endpoint."""
q1: float
q2: float
height: float
message: str
verdict: Optional[str] = None
def get_verdict(q1: float, q2: float, height: float) -> str:
"""
Calculate verdict using official epistemic rule.
Official epistemic rule:
- u = 1.0 - height (total uncertainty)
- If q2 >= 0.35 OR u >= 0.60 → REFUSED
- If q1 >= 0.35 OR (0.30 <= u < 0.60) → MAYBE
- Otherwise → ACCEPT
"""
u = 1.0 - height # Total uncertainty
if q2 >= 0.35 or u >= 0.60:
return "REFUSED"
if q1 >= 0.35 or (0.30 <= u < 0.60):
return "MAYBE"
return "ACCEPT"
# ============================================================================
# API Endpoints
# ============================================================================
@app.on_event("startup")
async def startup_event():
"""Load models on startup."""
models.load_models()
@app.get("/")
def root():
"""Root endpoint."""
return {
"name": "AletheionGuard HF Space",
"version": "2.0.0",
"status": "operational",
"models_loaded": models.loaded
}
@app.post("/predict", response_model=PredictResponse)
def predict(
request: PredictRequest,
authorization: str = Header(...)
):
"""
Predict endpoint using trained neural models.
Returns epistemic uncertainty metrics (q1, q2, height) computed by
the trained AletheionGuard neural networks.
Args:
request: Text and optional context
authorization: Bearer token (verified by HF automatically)
Returns:
Neural-computed metrics with verdict
Example:
>>> POST /predict
>>> Headers: Authorization: Bearer hf_...
>>> Body: {"text": "Paris is the capital of France", "context": "geography"}
>>> Response: {"q1": 0.08, "q2": 0.12, "height": 0.86, "verdict": "ACCEPT"}
"""
try:
if not models.loaded:
raise HTTPException(status_code=503, detail="Models not loaded")
logger.info(f"Received prediction request - text_length={len(request.text)}")
# Combine text and context for embedding
full_text = request.text
if request.context:
full_text = f"{request.context}: {request.text}"
# 1. Get embeddings from sentence transformer
with torch.no_grad():
embeddings = models.encoder.encode(
full_text,
convert_to_tensor=True,
device=models.device
)
# 2. Compute Q1 (aleatoric uncertainty)
q1_tensor = models.q1_gate(embeddings)
q1 = float(q1_tensor.item())
# 3. Compute Q2 (epistemic uncertainty) - conditioned on Q1
q2_tensor = models.q2_gate(embeddings, q1_tensor)
q2 = float(q2_tensor.item())
# 4. Compute height from pyramidal formula
# height = 1 - sqrt(q1^2 + q2^2)
height = max(0.0, min(1.0, 1.0 - math.sqrt(q1**2 + q2**2)))
# 5. Calculate verdict
verdict = get_verdict(q1, q2, height)
logger.info(f"Prediction: q1={q1:.3f}, q2={q2:.3f}, height={height:.3f}, verdict={verdict}")
return PredictResponse(
q1=round(q1, 3),
q2=round(q2, 3),
height=round(height, 3),
message="Neural metrics computed successfully.",
verdict=verdict
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Prediction failed: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/health")
def health():
"""Health check endpoint."""
return {
"status": "healthy",
"models_loaded": models.loaded,
"device": str(models.device)
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)