# SPDX-License-Identifier: AGPL-3.0-or-later
# Copyright (c) 2024-2025 Felipe Maya Muniz

"""
Production Hugging Face Space for AletheionGuard.

This endpoint loads the trained neural models and provides accurate
epistemic uncertainty estimation using the full AletheionGuard architecture.
"""

from fastapi import FastAPI, HTTPException, Header
from pydantic import BaseModel
from typing import Optional
import logging
import math
import torch
import torch.nn as nn
from sentence_transformers import SentenceTransformer
from pathlib import Path

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = FastAPI(
    title="AletheionGuard HF Space",
    description="Production epistemic uncertainty estimation",
    version="2.0.0"
)


# ============================================================================
# Model Definitions (copied from q1q2_gates.py)
# ============================================================================

class UncertaintyNetwork(nn.Module):
    """Base neural network for uncertainty estimation."""

    def __init__(
        self,
        input_dim: int = 384,
        hidden_dim: int = 256,
        num_layers: int = 3,
        dropout: float = 0.1
    ):
        super().__init__()

        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

        # Build MLP layers
        layers = []

        # Input layer
        layers.append(nn.Linear(input_dim, hidden_dim))
        layers.append(nn.ReLU())
        layers.append(nn.Dropout(dropout))

        # Hidden layers
        for _ in range(num_layers - 1):
            layers.append(nn.Linear(hidden_dim, hidden_dim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))

        # Output layer (single uncertainty value)
        layers.append(nn.Linear(hidden_dim, 1))
        layers.append(nn.Sigmoid())  # Clamp to [0, 1]

        self.network = nn.Sequential(*layers)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        if x.dim() == 1:
            x = x.unsqueeze(0)
            single_sample = True
        else:
            single_sample = False

        output = self.network(x)

        if single_sample:
            output = output.squeeze(0)

        return output


class Q1Gate(nn.Module):
    """Aleatoric uncertainty gate (Q1)."""

    def __init__(self, input_dim: int = 384, hidden_dim: int = 256):
        super().__init__()
        self.network = UncertaintyNetwork(
            input_dim=input_dim,
            hidden_dim=hidden_dim,
            num_layers=3,
            dropout=0.1
        )

    def forward(self, embeddings: torch.Tensor) -> torch.Tensor:
        return self.network(embeddings)


class Q2Gate(nn.Module):
    """Epistemic uncertainty gate (Q2) - conditioned on Q1."""

    def __init__(self, input_dim: int = 384, hidden_dim: int = 256):
        super().__init__()
        # Q2 is conditioned on Q1, so input is embeddings + Q1 value
        self.network = UncertaintyNetwork(
            input_dim=input_dim + 1,  # +1 for Q1 conditioning
            hidden_dim=hidden_dim,
            num_layers=3,
            dropout=0.1
        )

    def forward(self, embeddings: torch.Tensor, q1: torch.Tensor) -> torch.Tensor:
        # Handle single sample
        if embeddings.dim() == 1:
            embeddings = embeddings.unsqueeze(0)
            single_sample = True
        else:
            single_sample = False

        # Convert Q1 to tensor if needed
        if isinstance(q1, float):
            q1 = torch.tensor([[q1]], dtype=embeddings.dtype, device=embeddings.device)
        elif q1.dim() == 0:
            q1 = q1.unsqueeze(0).unsqueeze(0)
        elif q1.dim() == 1:
            q1 = q1.unsqueeze(1)

        # Concatenate embeddings with Q1 for conditioning
        combined = torch.cat([embeddings, q1], dim=1)
        output = self.network(combined)

        if single_sample:
            output = output.squeeze(0)

        return output


# ============================================================================
# Global Model State
# ============================================================================

class ModelState:
    """Global state for loaded models."""

    def __init__(self):
        self.encoder = None
        self.q1_gate = None
        self.q2_gate = None
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.loaded = False

    def load_models(self):
        """Load all models at startup."""
        if self.loaded:
            return

        try:
            logger.info("🔧 Loading models...")

            # 1. Load sentence transformer for embeddings
            logger.info("  Loading sentence transformer...")
            self.encoder = SentenceTransformer('all-MiniLM-L6-v2')
            self.encoder.eval()
            logger.info("  ✓ Sentence transformer loaded")

            # 2. Load Q1 gate
            logger.info("  Loading Q1 gate...")
            self.q1_gate = Q1Gate(input_dim=384, hidden_dim=256)
            if Path('q1_gate.pth').exists():
                self.q1_gate.load_state_dict(torch.load('q1_gate.pth', map_location=self.device))
                logger.info("  ✓ Q1 gate loaded from q1_gate.pth")
            else:
                logger.warning("  ⚠️  q1_gate.pth not found, using random weights")

            self.q1_gate.to(self.device)
            self.q1_gate.eval()

            # 3. Load Q2 gate
            logger.info("  Loading Q2 gate...")
            self.q2_gate = Q2Gate(input_dim=384, hidden_dim=256)
            if Path('q2_gate.pth').exists():
                self.q2_gate.load_state_dict(torch.load('q2_gate.pth', map_location=self.device))
                logger.info("  ✓ Q2 gate loaded from q2_gate.pth")
            else:
                logger.warning("  ⚠️  q2_gate.pth not found, using random weights")

            self.q2_gate.to(self.device)
            self.q2_gate.eval()

            self.loaded = True
            logger.info(f"✅ All models loaded successfully (device: {self.device})")

        except Exception as e:
            logger.error(f"❌ Failed to load models: {e}")
            raise


# Global model state
models = ModelState()


# ============================================================================
# API Models
# ============================================================================

class PredictRequest(BaseModel):
    """Request model for /predict endpoint."""
    text: str
    context: Optional[str] = None


class PredictResponse(BaseModel):
    """Response model for /predict endpoint."""
    q1: float
    q2: float
    height: float
    message: str
    verdict: Optional[str] = None


def get_verdict(q1: float, q2: float, height: float) -> str:
    """
    Calculate verdict using official epistemic rule.

    Official epistemic rule:
    - u = 1.0 - height (total uncertainty)
    - If q2 >= 0.35 OR u >= 0.60 → REFUSED
    - If q1 >= 0.35 OR (0.30 <= u < 0.60) → MAYBE
    - Otherwise → ACCEPT
    """
    u = 1.0 - height  # Total uncertainty

    if q2 >= 0.35 or u >= 0.60:
        return "REFUSED"
    if q1 >= 0.35 or (0.30 <= u < 0.60):
        return "MAYBE"
    return "ACCEPT"


# ============================================================================
# API Endpoints
# ============================================================================

@app.on_event("startup")
async def startup_event():
    """Load models on startup."""
    models.load_models()


@app.get("/")
def root():
    """Root endpoint."""
    return {
        "name": "AletheionGuard HF Space",
        "version": "2.0.0",
        "status": "operational",
        "models_loaded": models.loaded
    }


@app.post("/predict", response_model=PredictResponse)
def predict(
    request: PredictRequest,
    authorization: str = Header(...)
):
    """
    Predict endpoint using trained neural models.

    Returns epistemic uncertainty metrics (q1, q2, height) computed by
    the trained AletheionGuard neural networks.

    Args:
        request: Text and optional context
        authorization: Bearer token (verified by HF automatically)

    Returns:
        Neural-computed metrics with verdict

    Example:
        >>> POST /predict
        >>> Headers: Authorization: Bearer hf_...
        >>> Body: {"text": "Paris is the capital of France", "context": "geography"}
        >>> Response: {"q1": 0.08, "q2": 0.12, "height": 0.86, "verdict": "ACCEPT"}
    """
    try:
        if not models.loaded:
            raise HTTPException(status_code=503, detail="Models not loaded")

        logger.info(f"Received prediction request - text_length={len(request.text)}")

        # Combine text and context for embedding
        full_text = request.text
        if request.context:
            full_text = f"{request.context}: {request.text}"

        # 1. Get embeddings from sentence transformer
        with torch.no_grad():
            embeddings = models.encoder.encode(
                full_text,
                convert_to_tensor=True,
                device=models.device
            )

            # 2. Compute Q1 (aleatoric uncertainty)
            q1_tensor = models.q1_gate(embeddings)
            q1 = float(q1_tensor.item())

            # 3. Compute Q2 (epistemic uncertainty) - conditioned on Q1
            q2_tensor = models.q2_gate(embeddings, q1_tensor)
            q2 = float(q2_tensor.item())

        # 4. Compute height from pyramidal formula
        # height = 1 - sqrt(q1^2 + q2^2)
        height = max(0.0, min(1.0, 1.0 - math.sqrt(q1**2 + q2**2)))

        # 5. Calculate verdict
        verdict = get_verdict(q1, q2, height)

        logger.info(f"Prediction: q1={q1:.3f}, q2={q2:.3f}, height={height:.3f}, verdict={verdict}")

        return PredictResponse(
            q1=round(q1, 3),
            q2=round(q2, 3),
            height=round(height, 3),
            message="Neural metrics computed successfully.",
            verdict=verdict
        )

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Prediction failed: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))


@app.get("/health")
def health():
    """Health check endpoint."""
    return {
        "status": "healthy",
        "models_loaded": models.loaded,
        "device": str(models.device)
    }


if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)