import httpx import os import asyncio import lancedb from typing import List, Optional, Dict from app.services.llm import llm_service class BibleService: BASE_URL = "https://bible-api.com" DB_PATH = "data/lancedb_storage" TABLE_NAME = "bible_verses" def __init__(self): # We don't keep an in-memory index anymore. # Connection is established per-query or cached if needed. pass async def initialize_index(self): """ No-op for LanceDB as it's persistent. We might verify the DB exists here. """ if not os.path.exists(self.DB_PATH): print("BibleService: LanceDB storage not found at", self.DB_PATH) print("Please run scripts/ingest_bible.py") else: print("BibleService: LanceDB connected.") async def get_passage(self, reference: str, translation: str = "web") -> Optional[str]: """ Retrieves full passage text from bible-api.com (External Fallback). """ clean_ref = reference.strip() if not clean_ref: return None async with httpx.AsyncClient() as client: try: response = await client.get( f"{self.BASE_URL}/{clean_ref}", params={"translation": translation} ) if response.status_code == 200: return response.json().get("text", "").strip() return None except Exception: return None async def search(self, query: str, limit: int = 3) -> List[dict]: """ Semantic search using persistent LanceDB. """ if not os.path.exists(self.DB_PATH): print("BibleService Error: DB not initialized.") return [] # 1. Generate Query Vector query_embedding = await llm_service.get_embedding(query) if not query_embedding: return [] try: # 2. Search LanceDB db = lancedb.connect(self.DB_PATH) # Check if table exists if self.TABLE_NAME not in db.table_names(): print(f"BibleService: Table {self.TABLE_NAME} not found.") return [] tbl = db.open_table(self.TABLE_NAME) # LanceDB search # Explicitly select columns to ensure they are returned results = tbl.search(query_embedding).limit(limit).select(["reference", "text"]).to_list() # 3. Format Results valid_results = [] for item in results: # distance is typically L2. For binary-ish vectors, it's sqrt(sum of differences squared). dist = item.get('_distance', 1.0) # Simple inverse normalization for display relevance = 1.0 / (1.0 + dist) valid_results.append({ "score": relevance, "text": item.get("text", ""), "reference": item.get("reference", "Unknown") }) return valid_results except Exception as e: print(f"BibleService Search Error: {e}") return [] bible_service = BibleService()