Spaces:
Running
Running
File size: 2,260 Bytes
8c1c41e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | """Small local reference assistant for project questions."""
from __future__ import annotations
import re
from pathlib import Path
class MiniReferenceModel:
"""Small retrieval-style assistant over the project specification."""
def __init__(self, spec_path: Path):
self.spec_path = spec_path
self.spec_text = spec_path.read_text(encoding="utf-8")
self.sections = self._build_sections(self.spec_text)
def _build_sections(self, text: str) -> list[dict[str, str]]:
blocks = []
current_title = "Overview"
current_lines: list[str] = []
for line in text.splitlines():
if line.startswith("#"):
if current_lines:
blocks.append({"title": current_title, "content": "\n".join(current_lines).strip()})
current_lines = []
current_title = line.lstrip("#").strip()
else:
current_lines.append(line)
if current_lines:
blocks.append({"title": current_title, "content": "\n".join(current_lines).strip()})
return [block for block in blocks if block["content"]]
def _tokens(self, text: str) -> set[str]:
return set(re.findall(r"[a-z0-9_]+", text.lower()))
def ask(self, question: str) -> tuple[str, list[dict[str, str]]]:
clean_question = (question or "").strip()
if not clean_question:
return "Write a question first.", []
query_tokens = self._tokens(clean_question)
scored = []
for section in self.sections:
section_tokens = self._tokens(section["title"] + " " + section["content"])
overlap = len(query_tokens & section_tokens)
scored.append((overlap, section))
scored.sort(key=lambda item: item[0], reverse=True)
top_sections = [section for score, section in scored[:3] if score > 0]
if not top_sections:
top_sections = self.sections[:2]
answer_parts = []
for section in top_sections[:2]:
condensed = " ".join(section["content"].split())
answer_parts.append(f"{section['title']}: {condensed[:700]}")
answer = "\n\n".join(answer_parts)
return answer, top_sections
|