"""Prompt assembly: system instruction + retrieved context -> chat messages.""" from __future__ import annotations from rag import Hit SYSTEM_PROMPT = ( "You are an expert Godot 4 GDScript assistant. Answer using the reference " "snippets provided below when they are relevant. Always write GDScript that " "targets Godot 4 (GDScript 2.0). Put runnable code in ```gdscript fenced " "blocks. Prefer static typing and @export/@onready annotations where natural. " "If the snippets don't cover the question, answer from general Godot knowledge " "and say so briefly. Be concise." ) # Keep the context budget modest so generation stays fast on ZeroGPU. MAX_CONTEXT_CHARS = 5000 # trimmed a bit since history now shares the prompt # Multi-turn memory bounds (keep prompts — and ZeroGPU time — bounded). MAX_HISTORY_TURNS = 4 # default # of prior user+assistant exchanges kept MAX_HISTORY_CHARS = 6000 # hard cap on total history text fed to the model # Marker that app.py appends after the model's answer; everything from here on is # our validation/sources/notes decoration and must be stripped before the answer # is fed back as conversation history. VALIDATION_DELIM = "\n\n---\n**Validation:**" def _format_context(hits: list[Hit]) -> str: blocks, used = [], 0 for i, h in enumerate(hits, 1): src = h.repo or h.origin_url or "corpus" snippet = h.text.strip() block = f"# Snippet {i} (source: {src})\n{snippet}" if used + len(block) > MAX_CONTEXT_CHARS: break blocks.append(block) used += len(block) return "\n\n".join(blocks) def _clean_assistant(content: str) -> str: """Strip our validation/sources/notes so only the model's answer remains.""" return content.split(VALIDATION_DELIM, 1)[0].rstrip() def _normalize(history) -> list[dict]: """Flatten gradio history (messages OR tuples format) to role/content dicts.""" out: list[dict] = [] for item in history or []: if isinstance(item, dict) and "role" in item: role, content = item.get("role"), item.get("content", "") if not isinstance(content, str): # skip file/component messages continue if role == "assistant": content = _clean_assistant(content) if content.strip(): out.append({"role": role, "content": content}) elif isinstance(item, (list, tuple)) and len(item) == 2: u, a = item if isinstance(u, str) and u.strip(): out.append({"role": "user", "content": u}) if isinstance(a, str) and a.strip(): out.append({"role": "assistant", "content": _clean_assistant(a)}) return out def _prepare_history(history, max_turns: int) -> list[dict]: """Last `max_turns` exchanges, sanitized and char-bounded, starting on a user turn.""" if max_turns <= 0: # 0 = single-turn ([-0:] would keep ALL) return [] msgs = _normalize(history)[-2 * max_turns:] # Apply the char budget from the most recent message backwards. total, bounded = 0, [] for m in reversed(msgs): total += len(m["content"]) if bounded and total > MAX_HISTORY_CHARS: break bounded.append(m) bounded.reverse() while bounded and bounded[0]["role"] != "user": # don't start on an assistant turn bounded.pop(0) return bounded def build_messages(question: str, hits: list[Hit], history=None, max_turns: int = MAX_HISTORY_TURNS) -> list[dict]: """Build chat-template messages: system + bounded history + context+question.""" context = _format_context(hits) messages: list[dict] = [{"role": "system", "content": SYSTEM_PROMPT}] messages.extend(_prepare_history(history, max_turns)) user = question if not context else ( f"Reference GDScript snippets from a curated Godot corpus:\n\n" f"{context}\n\n---\n\nQuestion: {question}" ) messages.append({"role": "user", "content": user}) return messages def build_fix_messages(broken_code: str, error: str) -> list[dict]: """Messages asking the model to fix a GDScript snippet that failed to parse.""" return [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": ( "The following GDScript failed to parse with this error:\n" f"{error}\n\nFix it and return ONLY the corrected GDScript in a " f"```gdscript block:\n\n```gdscript\n{broken_code}\n```" )}, ]