Spaces:
Sleeping
Sleeping
| """ | |
| Church Fathers Commentary Semantic Search API | |
| Hugging Face Spaces deployment with FastAPI | |
| Keeps model in memory for fast responses (~50-100ms after initial load) | |
| """ | |
| from fastapi import FastAPI, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel, Field | |
| from typing import List, Optional | |
| import torch | |
| import numpy as np | |
| from transformers import AutoTokenizer, AutoModel | |
| import json | |
| import os | |
| from pathlib import Path | |
| import logging | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # Initialize FastAPI app | |
| app = FastAPI( | |
| title="Church Fathers Commentary Search API", | |
| description="Semantic search over Church Fathers commentaries using BGE embeddings", | |
| version="1.0.0" | |
| ) | |
| # Enable CORS for all origins | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # Request/Response models | |
| class SearchRequest(BaseModel): | |
| query: str = Field(..., description="Search query text", min_length=1, max_length=500) | |
| limit: int = Field(10, description="Number of results to return", ge=1, le=100) | |
| books: Optional[List[str]] = Field(None, description="Filter by specific NT books (e.g., ['matthew', 'john'])") | |
| fathers: Optional[List[str]] = Field(None, description="Filter by specific Church Fathers") | |
| class SearchResult(BaseModel): | |
| book: str | |
| father_name: str | |
| source_title: str | |
| content: str | |
| similarity: float | |
| location_start: Optional[str] = None | |
| location_end: Optional[str] = None | |
| class SearchResponse(BaseModel): | |
| query: str | |
| results: List[SearchResult] | |
| total_searched: int | |
| execution_time_ms: float | |
| # Global variables for model and data | |
| MODEL_NAME = "BAAI/bge-large-en-v1.5" | |
| tokenizer = None | |
| model = None | |
| commentary_embeddings = {} | |
| commentary_metadata = {} | |
| # Book and Father mappings | |
| NEW_TESTAMENT_BOOKS = [ | |
| "matthew", "mark", "luke", "john", "acts", "romans", "1corinthians", "2corinthians", | |
| "galatians", "ephesians", "philippians", "colossians", "1thessalonians", "2thessalonians", | |
| "1timothy", "2timothy", "titus", "philemon", "hebrews", "james", "1peter", | |
| "2peter", "1john", "2john", "3john", "jude", "revelation" | |
| ] | |
| CHURCH_FATHERS = [ | |
| "Augustine of Hippo", | |
| "Athanasius of Alexandria", | |
| "Basil of Caesarea", | |
| "Gregory of Nazianzus", | |
| "Gregory of Nyssa", | |
| "Cyril of Alexandria", | |
| "Irenaeus", | |
| "Cyprian", | |
| "Origen of Alexandria" | |
| ] | |
| async def load_model_and_data(): | |
| """Load model and commentary embeddings into memory at startup""" | |
| global tokenizer, model, commentary_embeddings, commentary_metadata | |
| logger.info("Loading model and tokenizer...") | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| model = AutoModel.from_pretrained(MODEL_NAME) | |
| model.eval() # Set to evaluation mode | |
| # Move to GPU if available | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model = model.to(device) | |
| logger.info(f"Model loaded successfully on {device}") | |
| except Exception as e: | |
| logger.error(f"Error loading model: {e}") | |
| raise | |
| logger.info("Loading commentary embeddings...") | |
| try: | |
| # Load embeddings from data directory | |
| data_dir = Path("data") | |
| if not data_dir.exists(): | |
| logger.warning("Data directory not found. Embeddings will be empty.") | |
| return | |
| loaded_count = 0 | |
| total_entries = 0 | |
| # Load all JSON files from data directory | |
| for json_file in data_dir.rglob("*.json"): | |
| try: | |
| with open(json_file, 'r') as f: | |
| entry = json.load(f) | |
| # Extract metadata | |
| book = entry['metadata'].get('book', 'unknown') | |
| # Initialize book storage if needed | |
| if book not in commentary_embeddings: | |
| commentary_embeddings[book] = [] | |
| commentary_metadata[book] = [] | |
| # Store embedding and metadata | |
| commentary_embeddings[book].append(entry['embedding']) | |
| commentary_metadata[book].append({ | |
| 'content': entry['content'], | |
| 'father_name': entry['metadata'].get('father_name', 'Unknown'), | |
| 'source_title': entry['metadata'].get('source_title', ''), | |
| 'location_start': entry['metadata'].get('location_start', ''), | |
| 'location_end': entry['metadata'].get('location_end', ''), | |
| }) | |
| total_entries += 1 | |
| except Exception as e: | |
| logger.warning(f"Error loading {json_file}: {e}") | |
| continue | |
| # Convert lists to numpy arrays for faster computation | |
| for book in commentary_embeddings: | |
| commentary_embeddings[book] = np.array(commentary_embeddings[book], dtype=np.float32) | |
| loaded_count += 1 | |
| logger.info(f"Loaded {len(commentary_embeddings[book])} entries for {book}") | |
| logger.info(f"Successfully loaded {total_entries} total entries across {loaded_count} books") | |
| except Exception as e: | |
| logger.error(f"Error loading embeddings: {e}") | |
| raise | |
| def generate_embedding(text: str) -> np.ndarray: | |
| """Generate embedding for input text using loaded model""" | |
| # Add query instruction for BGE model | |
| query_instruction = "Represent the Religious Bible verse commentary text for semantic search:" | |
| text_with_instruction = f"{query_instruction} {text}" | |
| # Tokenize | |
| inputs = tokenizer(text_with_instruction, return_tensors="pt", padding=True, truncation=True, max_length=512) | |
| # Move to same device as model | |
| device = next(model.parameters()).device | |
| inputs = {k: v.to(device) for k, v in inputs.items()} | |
| # Generate embeddings | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| # Mean pooling | |
| embeddings = outputs.last_hidden_state.mean(dim=1) | |
| # Normalize | |
| embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1) | |
| return embeddings.cpu().numpy()[0] | |
| def cosine_similarity(query_embedding: np.ndarray, doc_embeddings: np.ndarray) -> np.ndarray: | |
| """Compute cosine similarity between query and document embeddings""" | |
| # Normalize query embedding | |
| query_norm = query_embedding / np.linalg.norm(query_embedding) | |
| # Normalize document embeddings | |
| doc_norms = np.linalg.norm(doc_embeddings, axis=1, keepdims=True) | |
| doc_embeddings_norm = doc_embeddings / doc_norms | |
| # Compute dot product (cosine similarity for normalized vectors) | |
| similarities = np.dot(doc_embeddings_norm, query_norm) | |
| return similarities | |
| async def root(): | |
| """Health check and API info""" | |
| return { | |
| "status": "online", | |
| "model": MODEL_NAME, | |
| "books_loaded": len(commentary_embeddings), | |
| "total_entries": sum(len(emb) for emb in commentary_embeddings.values()), | |
| "device": "cuda" if torch.cuda.is_available() else "cpu", | |
| "available_books": list(commentary_embeddings.keys()) | |
| } | |
| async def health_check(): | |
| """Detailed health check""" | |
| return { | |
| "model_loaded": model is not None, | |
| "tokenizer_loaded": tokenizer is not None, | |
| "embeddings_loaded": len(commentary_embeddings) > 0, | |
| "books_available": list(commentary_embeddings.keys()), | |
| "fathers_available": CHURCH_FATHERS | |
| } | |
| async def search(request: SearchRequest): | |
| """ | |
| Perform semantic search over Church Fathers commentaries | |
| - **query**: The search query text | |
| - **limit**: Number of results to return (1-100) | |
| - **books**: Optional list of NT books to filter by | |
| - **fathers**: Optional list of Church Fathers to filter by | |
| """ | |
| import time | |
| start_time = time.time() | |
| # Validate model is loaded | |
| if model is None or tokenizer is None: | |
| raise HTTPException(status_code=503, detail="Model not loaded yet. Please try again in a moment.") | |
| # Validate we have embeddings | |
| if len(commentary_embeddings) == 0: | |
| raise HTTPException(status_code=503, detail="Commentary embeddings not loaded. Please check data directory.") | |
| try: | |
| # Generate query embedding | |
| logger.info(f"Generating embedding for query: {request.query[:50]}...") | |
| query_embedding = generate_embedding(request.query) | |
| # Determine which books to search | |
| if request.books: | |
| books_to_search = [b for b in request.books if b in commentary_embeddings] | |
| if not books_to_search: | |
| raise HTTPException(status_code=400, detail="None of the specified books are available") | |
| else: | |
| books_to_search = list(commentary_embeddings.keys()) | |
| # Collect all results | |
| all_results = [] | |
| total_searched = 0 | |
| for book in books_to_search: | |
| book_embeddings = commentary_embeddings[book] | |
| book_metadata = commentary_metadata[book] | |
| # Compute similarities | |
| similarities = cosine_similarity(query_embedding, book_embeddings) | |
| # Create results | |
| for i, similarity in enumerate(similarities): | |
| if not np.isnan(similarity) and np.isfinite(similarity): | |
| metadata = book_metadata[i] | |
| # Apply father filter if specified | |
| if request.fathers and metadata['father_name'] not in request.fathers: | |
| continue | |
| all_results.append({ | |
| "book": book, | |
| "father_name": metadata['father_name'], | |
| "source_title": metadata['source_title'], | |
| "content": metadata['content'], | |
| "similarity": float(similarity), | |
| "location_start": str(metadata.get('location_start', '')), | |
| "location_end": str(metadata.get('location_end', '')) | |
| }) | |
| total_searched += len(similarities) | |
| # Sort by similarity and limit | |
| all_results.sort(key=lambda x: x['similarity'], reverse=True) | |
| top_results = all_results[:request.limit] | |
| execution_time = (time.time() - start_time) * 1000 # Convert to ms | |
| logger.info(f"Search completed in {execution_time:.2f}ms, returning {len(top_results)} results") | |
| return SearchResponse( | |
| query=request.query, | |
| results=top_results, | |
| total_searched=total_searched, | |
| execution_time_ms=round(execution_time, 2) | |
| ) | |
| except Exception as e: | |
| logger.error(f"Error during search: {e}") | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) | |