cadforge / server /docs_search.py
eventhorizon28's picture
Upload folder using huggingface_hub
7c72eb2 verified
import logging
import re
import time
from pathlib import Path
from typing import Dict, List, Optional
logger = logging.getLogger(__name__)
DOCS_ROOT = Path(__file__).parent / "docs"
TOPIC_MAP: Dict[str, List[Path]] = {
"basics": [
DOCS_ROOT / "skill.md",
DOCS_ROOT / "reference" / "quickstart.rst",
DOCS_ROOT / "reference" / "primer.rst",
],
"selectors": [
DOCS_ROOT / "concepts" / "selectors.md",
DOCS_ROOT / "reference" / "selectors.rst",
],
"booleans": [
DOCS_ROOT / "concepts" / "brep-mindset.md",
DOCS_ROOT / "patterns" / "common-patterns.md",
],
"transforms": [
DOCS_ROOT / "concepts" / "workplanes.md",
DOCS_ROOT / "reference" / "workplane.rst",
],
"features": [
DOCS_ROOT / "patterns" / "common-patterns.md",
DOCS_ROOT / "patterns" / "anti-patterns.md",
],
"sketch": [
DOCS_ROOT / "reference" / "sketch.rst",
],
"advanced": [
DOCS_ROOT / "concepts" / "free-function-api.md",
DOCS_ROOT / "reference" / "free-func.rst",
DOCS_ROOT / "reference" / "extending.rst",
],
"examples": [
DOCS_ROOT / "reference" / "examples.rst",
],
"anti-patterns": [
DOCS_ROOT / "patterns" / "anti-patterns.md",
],
"workplanes": [
DOCS_ROOT / "concepts" / "workplanes.md",
],
}
def search_docs(
topic: Optional[str] = None,
query: Optional[str] = None,
context_lines: int = 5,
max_results: int = 10,
max_chars: int = 4000,
) -> List[str]:
t0 = time.time()
if topic and topic in TOPIC_MAP:
files = TOPIC_MAP[topic]
elif topic:
files = _find_files_by_name(topic)
else:
files = []
for file_list in TOPIC_MAP.values():
files.extend(file_list)
files = list(set(files))
if not files:
return [f"No documentation found for topic: {topic}"]
if not query:
results = []
total_chars = 0
for fp in files:
if not fp.exists():
continue
content = fp.read_text(encoding="utf-8", errors="replace")
if total_chars + len(content) > max_chars:
remaining = max_chars - total_chars
if remaining > 200:
results.append(f"=== {fp.name} (truncated) ===\n{content[:remaining]}...")
break
results.append(f"=== {fp.name} ===\n{content}")
total_chars += len(content)
elapsed = time.time() - t0
logger.info(f"search_docs(topic={topic}) returned {len(results)} files in {elapsed:.3f}s")
return results
results = _grep_search(files, query, context_lines, max_results)
if not results:
results = _fuzzy_search(files, query, context_lines, max_results)
total_chars = 0
trimmed = []
for r in results:
if total_chars + len(r) > max_chars:
remaining = max_chars - total_chars
if remaining > 100:
trimmed.append(r[:remaining] + "...")
break
trimmed.append(r)
total_chars += len(r)
elapsed = time.time() - t0
logger.info(f"search_docs(topic={topic}, query={query}) returned {len(trimmed)} results in {elapsed:.3f}s")
if not trimmed:
return [f"No results found for query: {query}"]
return trimmed
def _find_files_by_name(name: str) -> List[Path]:
results = []
for fp in DOCS_ROOT.rglob("*"):
if fp.is_file() and name.lower() in fp.stem.lower():
results.append(fp)
return results
def _grep_search(
files: List[Path],
query: str,
context_lines: int = 5,
max_results: int = 10,
) -> List[str]:
results = []
keywords = query.lower().split()
for fp in files:
if not fp.exists():
continue
try:
lines = fp.read_text(encoding="utf-8", errors="replace").splitlines()
except Exception:
continue
for i, line in enumerate(lines):
line_lower = line.lower()
if any(kw in line_lower for kw in keywords):
start = max(0, i - context_lines)
end = min(len(lines), i + context_lines + 1)
snippet = "\n".join(lines[start:end])
results.append(f"--- {fp.name}:{i+1} ---\n{snippet}")
if len(results) >= max_results:
return results
return results
def _fuzzy_search(
files: List[Path],
query: str,
context_lines: int = 5,
max_results: int = 5,
) -> List[str]:
results = []
keywords = query.lower().split()
for fp in files:
if not fp.exists():
continue
try:
content = fp.read_text(encoding="utf-8", errors="replace")
except Exception:
continue
paragraphs = re.split(r"\n\s*\n", content)
scored = []
for para in paragraphs:
para_lower = para.lower()
score = sum(1 for kw in keywords if kw in para_lower)
if score > 0:
scored.append((score, para, fp.name))
scored.sort(key=lambda x: x[0], reverse=True)
for score, para, fname in scored[:max_results]:
results.append(f"--- {fname} (relevance: {score}/{len(keywords)}) ---\n{para.strip()}")
if len(results) >= max_results:
return results
return results
def get_system_prompt() -> str:
skill_path = DOCS_ROOT / "skill.md"
if skill_path.exists():
return skill_path.read_text(encoding="utf-8", errors="replace")
return ""
def list_topics() -> List[str]:
return list(TOPIC_MAP.keys())