Spaces:
Running
Running
| """Small local reference assistant for project questions.""" | |
| from __future__ import annotations | |
| import re | |
| from pathlib import Path | |
| class MiniReferenceModel: | |
| """Small retrieval-style assistant over the project specification.""" | |
| def __init__(self, spec_path: Path): | |
| self.spec_path = spec_path | |
| self.spec_text = spec_path.read_text(encoding="utf-8") | |
| self.sections = self._build_sections(self.spec_text) | |
| def _build_sections(self, text: str) -> list[dict[str, str]]: | |
| blocks = [] | |
| current_title = "Overview" | |
| current_lines: list[str] = [] | |
| for line in text.splitlines(): | |
| if line.startswith("#"): | |
| if current_lines: | |
| blocks.append({"title": current_title, "content": "\n".join(current_lines).strip()}) | |
| current_lines = [] | |
| current_title = line.lstrip("#").strip() | |
| else: | |
| current_lines.append(line) | |
| if current_lines: | |
| blocks.append({"title": current_title, "content": "\n".join(current_lines).strip()}) | |
| return [block for block in blocks if block["content"]] | |
| def _tokens(self, text: str) -> set[str]: | |
| return set(re.findall(r"[a-z0-9_]+", text.lower())) | |
| def ask(self, question: str) -> tuple[str, list[dict[str, str]]]: | |
| clean_question = (question or "").strip() | |
| if not clean_question: | |
| return "Write a question first.", [] | |
| query_tokens = self._tokens(clean_question) | |
| scored = [] | |
| for section in self.sections: | |
| section_tokens = self._tokens(section["title"] + " " + section["content"]) | |
| overlap = len(query_tokens & section_tokens) | |
| scored.append((overlap, section)) | |
| scored.sort(key=lambda item: item[0], reverse=True) | |
| top_sections = [section for score, section in scored[:3] if score > 0] | |
| if not top_sections: | |
| top_sections = self.sections[:2] | |
| answer_parts = [] | |
| for section in top_sections[:2]: | |
| condensed = " ".join(section["content"].split()) | |
| answer_parts.append(f"{section['title']}: {condensed[:700]}") | |
| answer = "\n\n".join(answer_parts) | |
| return answer, top_sections | |