import os import uuid from app.config import KB_FILES_DIR, KB_INDEX_PATH from app.knowledge.loader import extract_text_from_pdf, save_uploaded_pdf from app.knowledge.vector_store import BM25Index, Doc, save_index, load_index class KnowledgeService: def __init__(self): os.makedirs(KB_FILES_DIR, exist_ok=True) self.index: BM25Index = load_index(KB_INDEX_PATH) def add_pdf(self, uploaded_path: str) -> str: dest_path = save_uploaded_pdf(uploaded_path, KB_FILES_DIR) text = extract_text_from_pdf(dest_path) if not text: return f"⚠️ Не удалось извлечь текст из PDF: {os.path.basename(dest_path)}" doc = Doc( doc_id=str(uuid.uuid4())[:8], title=os.path.basename(dest_path), text=text, source_path=dest_path ) self.index.add(doc) save_index(KB_INDEX_PATH, self.index) return f"✅ Добавлено в базу: {doc.title} (id={doc.doc_id})" def list_docs(self) -> str: if not self.index.docs: return "Пока нет документов. Загрузите PDF." lines = [f"- {d.title} (id={d.doc_id})" for d in self.index.docs] return "\n".join(lines) def search(self, query: str, top_k: int = 5): return self.index.search(query, top_k=top_k) def build_context(self, query: str, top_k: int = 4) -> str: hits = self.search(query, top_k=top_k) if not hits: return "" blocks = [] for h in hits: blocks.append( f"[Источник: {h['title']} | score={h['score']}]\n{h['snippet']}" ) return "\n\n---\n\n".join(blocks)