File size: 1,745 Bytes
851f3c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import os
import uuid
from app.config import KB_FILES_DIR, KB_INDEX_PATH
from app.knowledge.loader import extract_text_from_pdf, save_uploaded_pdf
from app.knowledge.vector_store import BM25Index, Doc, save_index, load_index

class KnowledgeService:
    def __init__(self):
        os.makedirs(KB_FILES_DIR, exist_ok=True)
        self.index: BM25Index = load_index(KB_INDEX_PATH)

    def add_pdf(self, uploaded_path: str) -> str:
        dest_path = save_uploaded_pdf(uploaded_path, KB_FILES_DIR)
        text = extract_text_from_pdf(dest_path)
        if not text:
            return f"⚠️ Не удалось извлечь текст из PDF: {os.path.basename(dest_path)}"

        doc = Doc(
            doc_id=str(uuid.uuid4())[:8],
            title=os.path.basename(dest_path),
            text=text,
            source_path=dest_path
        )
        self.index.add(doc)
        save_index(KB_INDEX_PATH, self.index)
        return f"✅ Добавлено в базу: {doc.title} (id={doc.doc_id})"

    def list_docs(self) -> str:
        if not self.index.docs:
            return "Пока нет документов. Загрузите PDF."
        lines = [f"- {d.title} (id={d.doc_id})" for d in self.index.docs]
        return "\n".join(lines)

    def search(self, query: str, top_k: int = 5):
        return self.index.search(query, top_k=top_k)

    def build_context(self, query: str, top_k: int = 4) -> str:
        hits = self.search(query, top_k=top_k)
        if not hits:
            return ""
        blocks = []
        for h in hits:
            blocks.append(
                f"[Источник: {h['title']} | score={h['score']}]\n{h['snippet']}"
            )
        return "\n\n---\n\n".join(blocks)