""" Church Fathers Commentary Semantic Search API Hugging Face Spaces deployment with FastAPI Keeps model in memory for fast responses (~50-100ms after initial load) """ from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel, Field from typing import List, Optional import torch import numpy as np from transformers import AutoTokenizer, AutoModel import json import os from pathlib import Path import logging # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Initialize FastAPI app app = FastAPI( title="Church Fathers Commentary Search API", description="Semantic search over Church Fathers commentaries using BGE embeddings", version="1.0.0" ) # Enable CORS for all origins app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Request/Response models class SearchRequest(BaseModel): query: str = Field(..., description="Search query text", min_length=1, max_length=500) limit: int = Field(10, description="Number of results to return", ge=1, le=100) books: Optional[List[str]] = Field(None, description="Filter by specific NT books (e.g., ['matthew', 'john'])") fathers: Optional[List[str]] = Field(None, description="Filter by specific Church Fathers") class SearchResult(BaseModel): book: str father_name: str source_title: str content: str similarity: float location_start: Optional[str] = None location_end: Optional[str] = None class SearchResponse(BaseModel): query: str results: List[SearchResult] total_searched: int execution_time_ms: float # Global variables for model and data MODEL_NAME = "BAAI/bge-large-en-v1.5" tokenizer = None model = None commentary_embeddings = {} commentary_metadata = {} # Book and Father mappings NEW_TESTAMENT_BOOKS = [ "matthew", "mark", "luke", "john", "acts", "romans", "1corinthians", "2corinthians", "galatians", "ephesians", "philippians", "colossians", "1thessalonians", "2thessalonians", "1timothy", "2timothy", "titus", "philemon", "hebrews", "james", "1peter", "2peter", "1john", "2john", "3john", "jude", "revelation" ] CHURCH_FATHERS = [ "Augustine of Hippo", "Athanasius of Alexandria", "Basil of Caesarea", "Gregory of Nazianzus", "Gregory of Nyssa", "Cyril of Alexandria", "Irenaeus", "Cyprian", "Origen of Alexandria" ] @app.on_event("startup") async def load_model_and_data(): """Load model and commentary embeddings into memory at startup""" global tokenizer, model, commentary_embeddings, commentary_metadata logger.info("Loading model and tokenizer...") try: tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModel.from_pretrained(MODEL_NAME) model.eval() # Set to evaluation mode # Move to GPU if available device = "cuda" if torch.cuda.is_available() else "cpu" model = model.to(device) logger.info(f"Model loaded successfully on {device}") except Exception as e: logger.error(f"Error loading model: {e}") raise logger.info("Loading commentary embeddings...") try: # Load embeddings from data directory data_dir = Path("data") if not data_dir.exists(): logger.warning("Data directory not found. Embeddings will be empty.") return loaded_count = 0 total_entries = 0 # Load all JSON files from data directory for json_file in data_dir.rglob("*.json"): try: with open(json_file, 'r') as f: entry = json.load(f) # Extract metadata book = entry['metadata'].get('book', 'unknown') # Initialize book storage if needed if book not in commentary_embeddings: commentary_embeddings[book] = [] commentary_metadata[book] = [] # Store embedding and metadata commentary_embeddings[book].append(entry['embedding']) commentary_metadata[book].append({ 'content': entry['content'], 'father_name': entry['metadata'].get('father_name', 'Unknown'), 'source_title': entry['metadata'].get('source_title', ''), 'location_start': entry['metadata'].get('location_start', ''), 'location_end': entry['metadata'].get('location_end', ''), }) total_entries += 1 except Exception as e: logger.warning(f"Error loading {json_file}: {e}") continue # Convert lists to numpy arrays for faster computation for book in commentary_embeddings: commentary_embeddings[book] = np.array(commentary_embeddings[book], dtype=np.float32) loaded_count += 1 logger.info(f"Loaded {len(commentary_embeddings[book])} entries for {book}") logger.info(f"Successfully loaded {total_entries} total entries across {loaded_count} books") except Exception as e: logger.error(f"Error loading embeddings: {e}") raise def generate_embedding(text: str) -> np.ndarray: """Generate embedding for input text using loaded model""" # Add query instruction for BGE model query_instruction = "Represent the Religious Bible verse commentary text for semantic search:" text_with_instruction = f"{query_instruction} {text}" # Tokenize inputs = tokenizer(text_with_instruction, return_tensors="pt", padding=True, truncation=True, max_length=512) # Move to same device as model device = next(model.parameters()).device inputs = {k: v.to(device) for k, v in inputs.items()} # Generate embeddings with torch.no_grad(): outputs = model(**inputs) # Mean pooling embeddings = outputs.last_hidden_state.mean(dim=1) # Normalize embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1) return embeddings.cpu().numpy()[0] def cosine_similarity(query_embedding: np.ndarray, doc_embeddings: np.ndarray) -> np.ndarray: """Compute cosine similarity between query and document embeddings""" # Normalize query embedding query_norm = query_embedding / np.linalg.norm(query_embedding) # Normalize document embeddings doc_norms = np.linalg.norm(doc_embeddings, axis=1, keepdims=True) doc_embeddings_norm = doc_embeddings / doc_norms # Compute dot product (cosine similarity for normalized vectors) similarities = np.dot(doc_embeddings_norm, query_norm) return similarities @app.get("/") async def root(): """Health check and API info""" return { "status": "online", "model": MODEL_NAME, "books_loaded": len(commentary_embeddings), "total_entries": sum(len(emb) for emb in commentary_embeddings.values()), "device": "cuda" if torch.cuda.is_available() else "cpu", "available_books": list(commentary_embeddings.keys()) } @app.get("/health") async def health_check(): """Detailed health check""" return { "model_loaded": model is not None, "tokenizer_loaded": tokenizer is not None, "embeddings_loaded": len(commentary_embeddings) > 0, "books_available": list(commentary_embeddings.keys()), "fathers_available": CHURCH_FATHERS } @app.post("/search", response_model=SearchResponse) async def search(request: SearchRequest): """ Perform semantic search over Church Fathers commentaries - **query**: The search query text - **limit**: Number of results to return (1-100) - **books**: Optional list of NT books to filter by - **fathers**: Optional list of Church Fathers to filter by """ import time start_time = time.time() # Validate model is loaded if model is None or tokenizer is None: raise HTTPException(status_code=503, detail="Model not loaded yet. Please try again in a moment.") # Validate we have embeddings if len(commentary_embeddings) == 0: raise HTTPException(status_code=503, detail="Commentary embeddings not loaded. Please check data directory.") try: # Generate query embedding logger.info(f"Generating embedding for query: {request.query[:50]}...") query_embedding = generate_embedding(request.query) # Determine which books to search if request.books: books_to_search = [b for b in request.books if b in commentary_embeddings] if not books_to_search: raise HTTPException(status_code=400, detail="None of the specified books are available") else: books_to_search = list(commentary_embeddings.keys()) # Collect all results all_results = [] total_searched = 0 for book in books_to_search: book_embeddings = commentary_embeddings[book] book_metadata = commentary_metadata[book] # Compute similarities similarities = cosine_similarity(query_embedding, book_embeddings) # Create results for i, similarity in enumerate(similarities): if not np.isnan(similarity) and np.isfinite(similarity): metadata = book_metadata[i] # Apply father filter if specified if request.fathers and metadata['father_name'] not in request.fathers: continue all_results.append({ "book": book, "father_name": metadata['father_name'], "source_title": metadata['source_title'], "content": metadata['content'], "similarity": float(similarity), "location_start": str(metadata.get('location_start', '')), "location_end": str(metadata.get('location_end', '')) }) total_searched += len(similarities) # Sort by similarity and limit all_results.sort(key=lambda x: x['similarity'], reverse=True) top_results = all_results[:request.limit] execution_time = (time.time() - start_time) * 1000 # Convert to ms logger.info(f"Search completed in {execution_time:.2f}ms, returning {len(top_results)} results") return SearchResponse( query=request.query, results=top_results, total_searched=total_searched, execution_time_ms=round(execution_time, 2) ) except Exception as e: logger.error(f"Error during search: {e}") raise HTTPException(status_code=500, detail=str(e)) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)