"""Retrieval-only RAG engine for chat responses.""" from __future__ import annotations import logging import os import re from pathlib import Path from ingestion_engine.embedding_generator import generate_query from persistence.vector_store import VectorStore logger = logging.getLogger(__name__) K_RETRIEVE = 40 K_FINAL = 8 ALPHA = 0.05 MAX_SNIPPET_CHARS = 280 GEN_MODEL = "Qwen/Qwen2.5-7B-Instruct" MAX_NEW_TOKENS = 400 TEMPERATURE = 0.2 TIMEOUT_SEC = 45 PROMPT_FILE = Path(__file__).resolve().parent.parent / "artifacts" / "prompt.poml" def _parse_poml() -> tuple[str, str]: """Parse prompt.poml into (system_message, user_template).""" raw = PROMPT_FILE.read_text(encoding="utf-8") # System message: + rules inside role_m = re.search(r"(.*?)", raw, re.DOTALL) role = role_m.group(1).strip() if role_m else "" items = re.findall(r"(.*?)", raw, re.DOTALL) rules = "\n".join(f"{i+1}) {it.strip()}" for i, it in enumerate(items)) system_msg = f"{role}\nRules:\n{rules}" if role else rules # User template: content inside