uae-kb / backend /services.py
jinruiyang
Add backend API module for FastAPI server
da9db52
"""
UAE Knowledge System - Backend Services
Handles knowledge base and retriever initialization
"""
import sys
from pathlib import Path
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))
from ir.retriever import EntityRetriever, RetrievalOutput
from ir.knowledge_base import KnowledgeBase
# ============================================================
# Global State
# ============================================================
_retriever = None
_knowledge_base = None
# Paths relative to project root
PROJECT_ROOT = Path(__file__).parent.parent
INDEX_CACHE_PATH = PROJECT_ROOT / "ir" / "cache" / "dense_index"
def get_knowledge_base() -> KnowledgeBase:
"""Lazy load knowledge base"""
global _knowledge_base
if _knowledge_base is None:
print("Loading knowledge base...")
_knowledge_base = KnowledgeBase(debug=False)
return _knowledge_base
def get_retriever():
"""Get the dense retriever (cached)"""
global _retriever
if _retriever is not None:
return _retriever
from ir.retrievers.dense import DenseRetriever
print("Loading dense retriever...")
retriever = DenseRetriever(model_name="bge-m3", debug=False)
kb = get_knowledge_base()
# Try to load cached index
if INDEX_CACHE_PATH.exists():
print(f"Loading cached index from {INDEX_CACHE_PATH}...")
if retriever.load_index(str(INDEX_CACHE_PATH)):
print("Cached index loaded!")
else:
print("Cache load failed, building index...")
retriever.build_index_from_knowledge_base(kb)
retriever.save_index(str(INDEX_CACHE_PATH))
else:
print("Building dense index (this may take a while)...")
retriever.build_index_from_knowledge_base(kb)
INDEX_CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
retriever.save_index(str(INDEX_CACHE_PATH))
print("Index built and cached!")
_retriever = retriever
return retriever
def search_knowledge_base(query: str, top_k: int = 5):
"""
Search the knowledge base and return formatted results
"""
retriever = get_retriever()
kb = get_knowledge_base()
# Perform search
results = retriever.search(query, top_k=top_k)
# Format results
formatted_results = []
for metadata, score in results:
entity_id = metadata.get("entity_id", "")
entity_name = metadata.get("entity_name", "Unknown")
# Get full entity data from KB
raw_data = kb.get_raw_entity(entity_id) if entity_id else None
result = {
"entity_id": entity_id,
"entity_name": entity_name,
"score": score,
"chunk_type": metadata.get("chunk_type", ""),
"subcategory": "",
"emirate": "",
"is_royal": False,
"summary": "",
"must_answer": []
}
if raw_data:
facts_data = raw_data.get('facts', {})
metadata_kb = raw_data.get('metadata', {})
result["subcategory"] = raw_data.get('subcategory', '')
result["emirate"] = metadata_kb.get('emirate', '')
result["is_royal"] = metadata_kb.get('is_royal', False)
result["summary"] = facts_data.get('summary_paragraph', '')
# Extract must-answer facts
must_answer = facts_data.get('must_answer', [])
result["must_answer"] = [
fact.get('fact', fact) if isinstance(fact, dict) else str(fact)
for fact in must_answer[:5]
]
# Include full entity data for detailed view
result["full_entity"] = raw_data
formatted_results.append(result)
return formatted_results
def get_stats():
"""Get knowledge base statistics"""
try:
kb = get_knowledge_base()
entities = len(kb.entities)
# Fixed: 8 knowledge categories as defined in the system
return {
"entities": entities,
"categories": 8,
"version": "2.3.0"
}
except Exception as e:
return {"entities": 0, "categories": 8, "error": str(e)}