Spaces:

gnai-creator
/

AletheionGuard

Sleeping

AletheionGuard / app.py

gnai-creator

Fix: Load trained neural models instead of heuristics

2f5015f about 2 months ago

10.5 kB

	# SPDX-License-Identifier: AGPL-3.0-or-later
	# Copyright (c) 2024-2025 Felipe Maya Muniz

	"""
	Production Hugging Face Space for AletheionGuard.

	This endpoint loads the trained neural models and provides accurate
	epistemic uncertainty estimation using the full AletheionGuard architecture.
	"""

	from fastapi import FastAPI, HTTPException, Header
	from pydantic import BaseModel
	from typing import Optional
	import logging
	import math
	import torch
	import torch.nn as nn
	from sentence_transformers import SentenceTransformer
	from pathlib import Path

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	app = FastAPI(
	title="AletheionGuard HF Space",
	description="Production epistemic uncertainty estimation",
	version="2.0.0"
	)


	# ============================================================================
	# Model Definitions (copied from q1q2_gates.py)
	# ============================================================================

	class UncertaintyNetwork(nn.Module):
	"""Base neural network for uncertainty estimation."""

	def __init__(
	self,
	input_dim: int = 384,
	hidden_dim: int = 256,
	num_layers: int = 3,
	dropout: float = 0.1
	):
	super().__init__()

	self.input_dim = input_dim
	self.hidden_dim = hidden_dim
	self.num_layers = num_layers

	# Build MLP layers
	layers = []

	# Input layer
	layers.append(nn.Linear(input_dim, hidden_dim))
	layers.append(nn.ReLU())
	layers.append(nn.Dropout(dropout))

	# Hidden layers
	for _ in range(num_layers - 1):
	layers.append(nn.Linear(hidden_dim, hidden_dim))
	layers.append(nn.ReLU())
	layers.append(nn.Dropout(dropout))

	# Output layer (single uncertainty value)
	layers.append(nn.Linear(hidden_dim, 1))
	layers.append(nn.Sigmoid()) # Clamp to [0, 1]

	self.network = nn.Sequential(*layers)

	def forward(self, x: torch.Tensor) -> torch.Tensor:
	if x.dim() == 1:
	x = x.unsqueeze(0)
	single_sample = True
	else:
	single_sample = False

	output = self.network(x)

	if single_sample:
	output = output.squeeze(0)

	return output


	class Q1Gate(nn.Module):
	"""Aleatoric uncertainty gate (Q1)."""

	def __init__(self, input_dim: int = 384, hidden_dim: int = 256):
	super().__init__()
	self.network = UncertaintyNetwork(
	input_dim=input_dim,
	hidden_dim=hidden_dim,
	num_layers=3,
	dropout=0.1
	)

	def forward(self, embeddings: torch.Tensor) -> torch.Tensor:
	return self.network(embeddings)


	class Q2Gate(nn.Module):
	"""Epistemic uncertainty gate (Q2) - conditioned on Q1."""

	def __init__(self, input_dim: int = 384, hidden_dim: int = 256):
	super().__init__()
	# Q2 is conditioned on Q1, so input is embeddings + Q1 value
	self.network = UncertaintyNetwork(
	input_dim=input_dim + 1, # +1 for Q1 conditioning
	hidden_dim=hidden_dim,
	num_layers=3,
	dropout=0.1
	)

	def forward(self, embeddings: torch.Tensor, q1: torch.Tensor) -> torch.Tensor:
	# Handle single sample
	if embeddings.dim() == 1:
	embeddings = embeddings.unsqueeze(0)
	single_sample = True
	else:
	single_sample = False

	# Convert Q1 to tensor if needed
	if isinstance(q1, float):
	q1 = torch.tensor([[q1]], dtype=embeddings.dtype, device=embeddings.device)
	elif q1.dim() == 0:
	q1 = q1.unsqueeze(0).unsqueeze(0)
	elif q1.dim() == 1:
	q1 = q1.unsqueeze(1)

	# Concatenate embeddings with Q1 for conditioning
	combined = torch.cat([embeddings, q1], dim=1)
	output = self.network(combined)

	if single_sample:
	output = output.squeeze(0)

	return output


	# ============================================================================
	# Global Model State
	# ============================================================================

	class ModelState:
	"""Global state for loaded models."""

	def __init__(self):
	self.encoder = None
	self.q1_gate = None
	self.q2_gate = None
	self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	self.loaded = False

	def load_models(self):
	"""Load all models at startup."""
	if self.loaded:
	return

	try:
	logger.info("🔧 Loading models...")

	# 1. Load sentence transformer for embeddings
	logger.info(" Loading sentence transformer...")
	self.encoder = SentenceTransformer('all-MiniLM-L6-v2')
	self.encoder.eval()
	logger.info(" ✓ Sentence transformer loaded")

	# 2. Load Q1 gate
	logger.info(" Loading Q1 gate...")
	self.q1_gate = Q1Gate(input_dim=384, hidden_dim=256)
	if Path('q1_gate.pth').exists():
	self.q1_gate.load_state_dict(torch.load('q1_gate.pth', map_location=self.device))
	logger.info(" ✓ Q1 gate loaded from q1_gate.pth")
	else:
	logger.warning(" ⚠️ q1_gate.pth not found, using random weights")

	self.q1_gate.to(self.device)
	self.q1_gate.eval()

	# 3. Load Q2 gate
	logger.info(" Loading Q2 gate...")
	self.q2_gate = Q2Gate(input_dim=384, hidden_dim=256)
	if Path('q2_gate.pth').exists():
	self.q2_gate.load_state_dict(torch.load('q2_gate.pth', map_location=self.device))
	logger.info(" ✓ Q2 gate loaded from q2_gate.pth")
	else:
	logger.warning(" ⚠️ q2_gate.pth not found, using random weights")

	self.q2_gate.to(self.device)
	self.q2_gate.eval()

	self.loaded = True
	logger.info(f"✅ All models loaded successfully (device: {self.device})")

	except Exception as e:
	logger.error(f"❌ Failed to load models: {e}")
	raise


	# Global model state
	models = ModelState()


	# ============================================================================
	# API Models
	# ============================================================================

	class PredictRequest(BaseModel):
	"""Request model for /predict endpoint."""
	text: str
	context: Optional[str] = None


	class PredictResponse(BaseModel):
	"""Response model for /predict endpoint."""
	q1: float
	q2: float
	height: float
	message: str
	verdict: Optional[str] = None


	def get_verdict(q1: float, q2: float, height: float) -> str:
	"""
	Calculate verdict using official epistemic rule.

	Official epistemic rule:
	- u = 1.0 - height (total uncertainty)
	- If q2 >= 0.35 OR u >= 0.60 → REFUSED
	- If q1 >= 0.35 OR (0.30 <= u < 0.60) → MAYBE
	- Otherwise → ACCEPT
	"""
	u = 1.0 - height # Total uncertainty

	if q2 >= 0.35 or u >= 0.60:
	return "REFUSED"
	if q1 >= 0.35 or (0.30 <= u < 0.60):
	return "MAYBE"
	return "ACCEPT"


	# ============================================================================
	# API Endpoints
	# ============================================================================

	@app.on_event("startup")
	async def startup_event():
	"""Load models on startup."""
	models.load_models()


	@app.get("/")
	def root():
	"""Root endpoint."""
	return {
	"name": "AletheionGuard HF Space",
	"version": "2.0.0",
	"status": "operational",
	"models_loaded": models.loaded
	}


	@app.post("/predict", response_model=PredictResponse)
	def predict(
	request: PredictRequest,
	authorization: str = Header(...)
	):
	"""
	Predict endpoint using trained neural models.

	Returns epistemic uncertainty metrics (q1, q2, height) computed by
	the trained AletheionGuard neural networks.

	Args:
	request: Text and optional context
	authorization: Bearer token (verified by HF automatically)

	Returns:
	Neural-computed metrics with verdict

	Example:
	>>> POST /predict
	>>> Headers: Authorization: Bearer hf_...
	>>> Body: {"text": "Paris is the capital of France", "context": "geography"}
	>>> Response: {"q1": 0.08, "q2": 0.12, "height": 0.86, "verdict": "ACCEPT"}
	"""
	try:
	if not models.loaded:
	raise HTTPException(status_code=503, detail="Models not loaded")

	logger.info(f"Received prediction request - text_length={len(request.text)}")

	# Combine text and context for embedding
	full_text = request.text
	if request.context:
	full_text = f"{request.context}: {request.text}"

	# 1. Get embeddings from sentence transformer
	with torch.no_grad():
	embeddings = models.encoder.encode(
	full_text,
	convert_to_tensor=True,
	device=models.device
	)

	# 2. Compute Q1 (aleatoric uncertainty)
	q1_tensor = models.q1_gate(embeddings)
	q1 = float(q1_tensor.item())

	# 3. Compute Q2 (epistemic uncertainty) - conditioned on Q1
	q2_tensor = models.q2_gate(embeddings, q1_tensor)
	q2 = float(q2_tensor.item())

	# 4. Compute height from pyramidal formula
	# height = 1 - sqrt(q1^2 + q2^2)
	height = max(0.0, min(1.0, 1.0 - math.sqrt(q12 + q22)))

	# 5. Calculate verdict
	verdict = get_verdict(q1, q2, height)

	logger.info(f"Prediction: q1={q1:.3f}, q2={q2:.3f}, height={height:.3f}, verdict={verdict}")

	return PredictResponse(
	q1=round(q1, 3),
	q2=round(q2, 3),
	height=round(height, 3),
	message="Neural metrics computed successfully.",
	verdict=verdict
	)

	except HTTPException:
	raise
	except Exception as e:
	logger.error(f"Prediction failed: {str(e)}")
	raise HTTPException(status_code=500, detail=str(e))


	@app.get("/health")
	def health():
	"""Health check endpoint."""
	return {
	"status": "healthy",
	"models_loaded": models.loaded,
	"device": str(models.device)
	}


	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)