File size: 2,260 Bytes
8c1c41e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
"""Small local reference assistant for project questions."""

from __future__ import annotations

import re
from pathlib import Path


class MiniReferenceModel:
    """Small retrieval-style assistant over the project specification."""

    def __init__(self, spec_path: Path):
        self.spec_path = spec_path
        self.spec_text = spec_path.read_text(encoding="utf-8")
        self.sections = self._build_sections(self.spec_text)

    def _build_sections(self, text: str) -> list[dict[str, str]]:
        blocks = []
        current_title = "Overview"
        current_lines: list[str] = []

        for line in text.splitlines():
            if line.startswith("#"):
                if current_lines:
                    blocks.append({"title": current_title, "content": "\n".join(current_lines).strip()})
                    current_lines = []
                current_title = line.lstrip("#").strip()
            else:
                current_lines.append(line)

        if current_lines:
            blocks.append({"title": current_title, "content": "\n".join(current_lines).strip()})
        return [block for block in blocks if block["content"]]

    def _tokens(self, text: str) -> set[str]:
        return set(re.findall(r"[a-z0-9_]+", text.lower()))

    def ask(self, question: str) -> tuple[str, list[dict[str, str]]]:
        clean_question = (question or "").strip()
        if not clean_question:
            return "Write a question first.", []

        query_tokens = self._tokens(clean_question)
        scored = []
        for section in self.sections:
            section_tokens = self._tokens(section["title"] + " " + section["content"])
            overlap = len(query_tokens & section_tokens)
            scored.append((overlap, section))
        scored.sort(key=lambda item: item[0], reverse=True)
        top_sections = [section for score, section in scored[:3] if score > 0]
        if not top_sections:
            top_sections = self.sections[:2]

        answer_parts = []
        for section in top_sections[:2]:
            condensed = " ".join(section["content"].split())
            answer_parts.append(f"{section['title']}: {condensed[:700]}")
        answer = "\n\n".join(answer_parts)
        return answer, top_sections