Spaces:
Sleeping
Sleeping
| import httpx | |
| import os | |
| import asyncio | |
| import lancedb | |
| from typing import List, Optional, Dict | |
| from app.services.llm import llm_service | |
| class BibleService: | |
| BASE_URL = "https://bible-api.com" | |
| DB_PATH = "data/lancedb_storage" | |
| TABLE_NAME = "bible_verses" | |
| def __init__(self): | |
| # We don't keep an in-memory index anymore. | |
| # Connection is established per-query or cached if needed. | |
| pass | |
| async def initialize_index(self): | |
| """ | |
| No-op for LanceDB as it's persistent. | |
| We might verify the DB exists here. | |
| """ | |
| if not os.path.exists(self.DB_PATH): | |
| print("BibleService: LanceDB storage not found at", self.DB_PATH) | |
| print("Please run scripts/ingest_bible.py") | |
| else: | |
| print("BibleService: LanceDB connected.") | |
| async def get_passage(self, reference: str, translation: str = "web") -> Optional[str]: | |
| """ | |
| Retrieves full passage text from bible-api.com (External Fallback). | |
| """ | |
| clean_ref = reference.strip() | |
| if not clean_ref: | |
| return None | |
| async with httpx.AsyncClient() as client: | |
| try: | |
| response = await client.get( | |
| f"{self.BASE_URL}/{clean_ref}", | |
| params={"translation": translation} | |
| ) | |
| if response.status_code == 200: | |
| return response.json().get("text", "").strip() | |
| return None | |
| except Exception: | |
| return None | |
| async def search(self, query: str, limit: int = 3) -> List[dict]: | |
| """ | |
| Semantic search using persistent LanceDB. | |
| """ | |
| if not os.path.exists(self.DB_PATH): | |
| print("BibleService Error: DB not initialized.") | |
| return [] | |
| # 1. Generate Query Vector | |
| query_embedding = await llm_service.get_embedding(query) | |
| if not query_embedding: | |
| return [] | |
| try: | |
| # 2. Search LanceDB | |
| db = lancedb.connect(self.DB_PATH) | |
| # Check if table exists | |
| if self.TABLE_NAME not in db.table_names(): | |
| print(f"BibleService: Table {self.TABLE_NAME} not found.") | |
| return [] | |
| tbl = db.open_table(self.TABLE_NAME) | |
| # LanceDB search | |
| # Explicitly select columns to ensure they are returned | |
| results = tbl.search(query_embedding).limit(limit).select(["reference", "text"]).to_list() | |
| # 3. Format Results | |
| valid_results = [] | |
| for item in results: | |
| # distance is typically L2. For binary-ish vectors, it's sqrt(sum of differences squared). | |
| dist = item.get('_distance', 1.0) | |
| # Simple inverse normalization for display | |
| relevance = 1.0 / (1.0 + dist) | |
| valid_results.append({ | |
| "score": relevance, | |
| "text": item.get("text", ""), | |
| "reference": item.get("reference", "Unknown") | |
| }) | |
| return valid_results | |
| except Exception as e: | |
| print(f"BibleService Search Error: {e}") | |
| return [] | |
| bible_service = BibleService() | |