Spaces:
Sleeping
Sleeping
File size: 3,340 Bytes
5e0532d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import httpx
import os
import asyncio
import lancedb
from typing import List, Optional, Dict
from app.services.llm import llm_service
class BibleService:
BASE_URL = "https://bible-api.com"
DB_PATH = "data/lancedb_storage"
TABLE_NAME = "bible_verses"
def __init__(self):
# We don't keep an in-memory index anymore.
# Connection is established per-query or cached if needed.
pass
async def initialize_index(self):
"""
No-op for LanceDB as it's persistent.
We might verify the DB exists here.
"""
if not os.path.exists(self.DB_PATH):
print("BibleService: LanceDB storage not found at", self.DB_PATH)
print("Please run scripts/ingest_bible.py")
else:
print("BibleService: LanceDB connected.")
async def get_passage(self, reference: str, translation: str = "web") -> Optional[str]:
"""
Retrieves full passage text from bible-api.com (External Fallback).
"""
clean_ref = reference.strip()
if not clean_ref:
return None
async with httpx.AsyncClient() as client:
try:
response = await client.get(
f"{self.BASE_URL}/{clean_ref}",
params={"translation": translation}
)
if response.status_code == 200:
return response.json().get("text", "").strip()
return None
except Exception:
return None
async def search(self, query: str, limit: int = 3) -> List[dict]:
"""
Semantic search using persistent LanceDB.
"""
if not os.path.exists(self.DB_PATH):
print("BibleService Error: DB not initialized.")
return []
# 1. Generate Query Vector
query_embedding = await llm_service.get_embedding(query)
if not query_embedding:
return []
try:
# 2. Search LanceDB
db = lancedb.connect(self.DB_PATH)
# Check if table exists
if self.TABLE_NAME not in db.table_names():
print(f"BibleService: Table {self.TABLE_NAME} not found.")
return []
tbl = db.open_table(self.TABLE_NAME)
# LanceDB search
# Explicitly select columns to ensure they are returned
results = tbl.search(query_embedding).limit(limit).select(["reference", "text"]).to_list()
# 3. Format Results
valid_results = []
for item in results:
# distance is typically L2. For binary-ish vectors, it's sqrt(sum of differences squared).
dist = item.get('_distance', 1.0)
# Simple inverse normalization for display
relevance = 1.0 / (1.0 + dist)
valid_results.append({
"score": relevance,
"text": item.get("text", ""),
"reference": item.get("reference", "Unknown")
})
return valid_results
except Exception as e:
print(f"BibleService Search Error: {e}")
return []
bible_service = BibleService()
|