AskXeno / src /knowledge_base.py
github-actions
Sync from GitHub
3cdce90
"""
Knowledge Base module for XENO Bot
Handles loading and preparing knowledge base data
"""
from typing import Any, Dict, Hashable, List, Tuple
import pandas as pd
from src.config import KNOWLEDGE_BASE_PATH
def load_knowledge_base(filepath: str = KNOWLEDGE_BASE_PATH) -> pd.DataFrame:
"""
Load knowledge base from JSON file
Args:
filepath: Path to the knowledge base JSON file
Returns:
DataFrame with knowledge base data
"""
try:
df = pd.read_json(filepath)
df.dropna(subset=["Content"], inplace=True)
except Exception as e:
print(f"Error loading knowledge base: {e}")
df = pd.DataFrame()
return df
def prepare_documents(
data: List[Dict[Hashable, Any]],
) -> Tuple[List[str], List[Dict], List[str]]:
"""
Prepare documents for vector store
Args:
data: List of knowledge base entries
Returns:
Tuple of (documents, metadatas, ids)
"""
documents, metadatas, ids = [], [], []
try:
for item in data:
# Create document text with question and answer
document_text = f"Question: {item['Question']}\nAnswer: {item['Content']}"
documents.append(document_text)
# Create metadata
metadata = {
"question": item["Question"],
"content": item["Content"],
"section": item.get("Section", ""),
"source": item.get("Source", ""),
"owner": item.get("Owner", ""),
"tag": item.get("Tag", ""),
"id": item["ID"],
}
metadatas.append(metadata)
# Add ID
ids.append(item["ID"])
except KeyError as e:
print(f"Missing expected key in data item: {e}")
return documents, metadatas, ids
def get_knowledge_base_data() -> Tuple[List[str], List[Dict], List[str]]:
"""
Load and prepare knowledge base data
Returns:
Tuple of (documents, metadatas, ids)
"""
df = load_knowledge_base()
data_list = df.to_dict("records")
return prepare_documents(data_list)