| import os | |
| import uuid | |
| from app.config import KB_FILES_DIR, KB_INDEX_PATH | |
| from app.knowledge.loader import extract_text_from_pdf, save_uploaded_pdf | |
| from app.knowledge.vector_store import BM25Index, Doc, save_index, load_index | |
| class KnowledgeService: | |
| def __init__(self): | |
| os.makedirs(KB_FILES_DIR, exist_ok=True) | |
| self.index: BM25Index = load_index(KB_INDEX_PATH) | |
| def add_pdf(self, uploaded_path: str) -> str: | |
| dest_path = save_uploaded_pdf(uploaded_path, KB_FILES_DIR) | |
| text = extract_text_from_pdf(dest_path) | |
| if not text: | |
| return f"⚠️ Не удалось извлечь текст из PDF: {os.path.basename(dest_path)}" | |
| doc = Doc( | |
| doc_id=str(uuid.uuid4())[:8], | |
| title=os.path.basename(dest_path), | |
| text=text, | |
| source_path=dest_path | |
| ) | |
| self.index.add(doc) | |
| save_index(KB_INDEX_PATH, self.index) | |
| return f"✅ Добавлено в базу: {doc.title} (id={doc.doc_id})" | |
| def list_docs(self) -> str: | |
| if not self.index.docs: | |
| return "Пока нет документов. Загрузите PDF." | |
| lines = [f"- {d.title} (id={d.doc_id})" for d in self.index.docs] | |
| return "\n".join(lines) | |
| def search(self, query: str, top_k: int = 5): | |
| return self.index.search(query, top_k=top_k) | |
| def build_context(self, query: str, top_k: int = 4) -> str: | |
| hits = self.search(query, top_k=top_k) | |
| if not hits: | |
| return "" | |
| blocks = [] | |
| for h in hits: | |
| blocks.append( | |
| f"[Источник: {h['title']} | score={h['score']}]\n{h['snippet']}" | |
| ) | |
| return "\n\n---\n\n".join(blocks) | |