"""
Church Fathers Commentary Semantic Search API
Hugging Face Spaces deployment with FastAPI
Keeps model in memory for fast responses (~50-100ms after initial load)
"""

from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
from typing import List, Optional
import torch
import numpy as np
from transformers import AutoTokenizer, AutoModel
import json
import os
from pathlib import Path
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize FastAPI app
app = FastAPI(
    title="Church Fathers Commentary Search API",
    description="Semantic search over Church Fathers commentaries using BGE embeddings",
    version="1.0.0"
)

# Enable CORS for all origins
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Request/Response models
class SearchRequest(BaseModel):
    query: str = Field(..., description="Search query text", min_length=1, max_length=500)
    limit: int = Field(10, description="Number of results to return", ge=1, le=100)
    books: Optional[List[str]] = Field(None, description="Filter by specific NT books (e.g., ['matthew', 'john'])")
    fathers: Optional[List[str]] = Field(None, description="Filter by specific Church Fathers")

class SearchResult(BaseModel):
    book: str
    father_name: str
    source_title: str
    content: str
    similarity: float
    location_start: Optional[str] = None
    location_end: Optional[str] = None

class SearchResponse(BaseModel):
    query: str
    results: List[SearchResult]
    total_searched: int
    execution_time_ms: float

# Global variables for model and data
MODEL_NAME = "BAAI/bge-large-en-v1.5"
tokenizer = None
model = None
commentary_embeddings = {}
commentary_metadata = {}

# Book and Father mappings
NEW_TESTAMENT_BOOKS = [
    "matthew", "mark", "luke", "john", "acts", "romans", "1corinthians", "2corinthians",
    "galatians", "ephesians", "philippians", "colossians", "1thessalonians", "2thessalonians",
    "1timothy", "2timothy", "titus", "philemon", "hebrews", "james", "1peter",
    "2peter", "1john", "2john", "3john", "jude", "revelation"
]

CHURCH_FATHERS = [
    "Augustine of Hippo",
    "Athanasius of Alexandria",
    "Basil of Caesarea",
    "Gregory of Nazianzus",
    "Gregory of Nyssa",
    "Cyril of Alexandria",
    "Irenaeus",
    "Cyprian",
    "Origen of Alexandria"
]


@app.on_event("startup")
async def load_model_and_data():
    """Load model and commentary embeddings into memory at startup"""
    global tokenizer, model, commentary_embeddings, commentary_metadata

    logger.info("Loading model and tokenizer...")
    try:
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
        model = AutoModel.from_pretrained(MODEL_NAME)
        model.eval()  # Set to evaluation mode

        # Move to GPU if available
        device = "cuda" if torch.cuda.is_available() else "cpu"
        model = model.to(device)
        logger.info(f"Model loaded successfully on {device}")

    except Exception as e:
        logger.error(f"Error loading model: {e}")
        raise

    logger.info("Loading commentary embeddings...")
    try:
        # Load embeddings from data directory
        data_dir = Path("data")
        if not data_dir.exists():
            logger.warning("Data directory not found. Embeddings will be empty.")
            return

        loaded_count = 0
        total_entries = 0

        # Load all JSON files from data directory
        for json_file in data_dir.rglob("*.json"):
            try:
                with open(json_file, 'r') as f:
                    entry = json.load(f)

                    # Extract metadata
                    book = entry['metadata'].get('book', 'unknown')

                    # Initialize book storage if needed
                    if book not in commentary_embeddings:
                        commentary_embeddings[book] = []
                        commentary_metadata[book] = []

                    # Store embedding and metadata
                    commentary_embeddings[book].append(entry['embedding'])
                    commentary_metadata[book].append({
                        'content': entry['content'],
                        'father_name': entry['metadata'].get('father_name', 'Unknown'),
                        'source_title': entry['metadata'].get('source_title', ''),
                        'location_start': entry['metadata'].get('location_start', ''),
                        'location_end': entry['metadata'].get('location_end', ''),
                    })
                    total_entries += 1

            except Exception as e:
                logger.warning(f"Error loading {json_file}: {e}")
                continue

        # Convert lists to numpy arrays for faster computation
        for book in commentary_embeddings:
            commentary_embeddings[book] = np.array(commentary_embeddings[book], dtype=np.float32)
            loaded_count += 1
            logger.info(f"Loaded {len(commentary_embeddings[book])} entries for {book}")

        logger.info(f"Successfully loaded {total_entries} total entries across {loaded_count} books")

    except Exception as e:
        logger.error(f"Error loading embeddings: {e}")
        raise


def generate_embedding(text: str) -> np.ndarray:
    """Generate embedding for input text using loaded model"""
    # Add query instruction for BGE model
    query_instruction = "Represent the Religious Bible verse commentary text for semantic search:"
    text_with_instruction = f"{query_instruction} {text}"

    # Tokenize
    inputs = tokenizer(text_with_instruction, return_tensors="pt", padding=True, truncation=True, max_length=512)

    # Move to same device as model
    device = next(model.parameters()).device
    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Generate embeddings
    with torch.no_grad():
        outputs = model(**inputs)
        # Mean pooling
        embeddings = outputs.last_hidden_state.mean(dim=1)
        # Normalize
        embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)

    return embeddings.cpu().numpy()[0]


def cosine_similarity(query_embedding: np.ndarray, doc_embeddings: np.ndarray) -> np.ndarray:
    """Compute cosine similarity between query and document embeddings"""
    # Normalize query embedding
    query_norm = query_embedding / np.linalg.norm(query_embedding)

    # Normalize document embeddings
    doc_norms = np.linalg.norm(doc_embeddings, axis=1, keepdims=True)
    doc_embeddings_norm = doc_embeddings / doc_norms

    # Compute dot product (cosine similarity for normalized vectors)
    similarities = np.dot(doc_embeddings_norm, query_norm)

    return similarities


@app.get("/")
async def root():
    """Health check and API info"""
    return {
        "status": "online",
        "model": MODEL_NAME,
        "books_loaded": len(commentary_embeddings),
        "total_entries": sum(len(emb) for emb in commentary_embeddings.values()),
        "device": "cuda" if torch.cuda.is_available() else "cpu",
        "available_books": list(commentary_embeddings.keys())
    }


@app.get("/health")
async def health_check():
    """Detailed health check"""
    return {
        "model_loaded": model is not None,
        "tokenizer_loaded": tokenizer is not None,
        "embeddings_loaded": len(commentary_embeddings) > 0,
        "books_available": list(commentary_embeddings.keys()),
        "fathers_available": CHURCH_FATHERS
    }


@app.post("/search", response_model=SearchResponse)
async def search(request: SearchRequest):
    """
    Perform semantic search over Church Fathers commentaries

    - **query**: The search query text
    - **limit**: Number of results to return (1-100)
    - **books**: Optional list of NT books to filter by
    - **fathers**: Optional list of Church Fathers to filter by
    """
    import time
    start_time = time.time()

    # Validate model is loaded
    if model is None or tokenizer is None:
        raise HTTPException(status_code=503, detail="Model not loaded yet. Please try again in a moment.")

    # Validate we have embeddings
    if len(commentary_embeddings) == 0:
        raise HTTPException(status_code=503, detail="Commentary embeddings not loaded. Please check data directory.")

    try:
        # Generate query embedding
        logger.info(f"Generating embedding for query: {request.query[:50]}...")
        query_embedding = generate_embedding(request.query)

        # Determine which books to search
        if request.books:
            books_to_search = [b for b in request.books if b in commentary_embeddings]
            if not books_to_search:
                raise HTTPException(status_code=400, detail="None of the specified books are available")
        else:
            books_to_search = list(commentary_embeddings.keys())

        # Collect all results
        all_results = []
        total_searched = 0

        for book in books_to_search:
            book_embeddings = commentary_embeddings[book]
            book_metadata = commentary_metadata[book]

            # Compute similarities
            similarities = cosine_similarity(query_embedding, book_embeddings)

            # Create results
            for i, similarity in enumerate(similarities):
                if not np.isnan(similarity) and np.isfinite(similarity):
                    metadata = book_metadata[i]

                    # Apply father filter if specified
                    if request.fathers and metadata['father_name'] not in request.fathers:
                        continue

                    all_results.append({
                        "book": book,
                        "father_name": metadata['father_name'],
                        "source_title": metadata['source_title'],
                        "content": metadata['content'],
                        "similarity": float(similarity),
                        "location_start": str(metadata.get('location_start', '')),
                        "location_end": str(metadata.get('location_end', ''))
                    })

            total_searched += len(similarities)

        # Sort by similarity and limit
        all_results.sort(key=lambda x: x['similarity'], reverse=True)
        top_results = all_results[:request.limit]

        execution_time = (time.time() - start_time) * 1000  # Convert to ms
        logger.info(f"Search completed in {execution_time:.2f}ms, returning {len(top_results)} results")

        return SearchResponse(
            query=request.query,
            results=top_results,
            total_searched=total_searched,
            execution_time_ms=round(execution_time, 2)
        )

    except Exception as e:
        logger.error(f"Error during search: {e}")
        raise HTTPException(status_code=500, detail=str(e))


if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)