Spaces:
Running on Zero
Running on Zero
Add bounded multi-turn chat memory (prompt.py)
Browse files
prompt.py
CHANGED
|
@@ -13,7 +13,16 @@ SYSTEM_PROMPT = (
|
|
| 13 |
)
|
| 14 |
|
| 15 |
# Keep the context budget modest so generation stays fast on ZeroGPU.
|
| 16 |
-
MAX_CONTEXT_CHARS =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
|
| 19 |
def _format_context(hits: list[Hit]) -> str:
|
|
@@ -29,13 +38,56 @@ def _format_context(hits: list[Hit]) -> str:
|
|
| 29 |
return "\n\n".join(blocks)
|
| 30 |
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
def build_messages(question: str, hits: list[Hit],
|
| 33 |
-
history:
|
| 34 |
-
"""Build chat-template messages
|
| 35 |
context = _format_context(hits)
|
| 36 |
messages: list[dict] = [{"role": "system", "content": SYSTEM_PROMPT}]
|
| 37 |
-
|
| 38 |
-
messages.extend(history)
|
| 39 |
user = question if not context else (
|
| 40 |
f"Reference GDScript snippets from a curated Godot corpus:\n\n"
|
| 41 |
f"{context}\n\n---\n\nQuestion: {question}"
|
|
|
|
| 13 |
)
|
| 14 |
|
| 15 |
# Keep the context budget modest so generation stays fast on ZeroGPU.
|
| 16 |
+
MAX_CONTEXT_CHARS = 5000 # trimmed a bit since history now shares the prompt
|
| 17 |
+
|
| 18 |
+
# Multi-turn memory bounds (keep prompts — and ZeroGPU time — bounded).
|
| 19 |
+
MAX_HISTORY_TURNS = 4 # default # of prior user+assistant exchanges kept
|
| 20 |
+
MAX_HISTORY_CHARS = 6000 # hard cap on total history text fed to the model
|
| 21 |
+
|
| 22 |
+
# Marker that app.py appends after the model's answer; everything from here on is
|
| 23 |
+
# our validation/sources/notes decoration and must be stripped before the answer
|
| 24 |
+
# is fed back as conversation history.
|
| 25 |
+
VALIDATION_DELIM = "\n\n---\n**Validation:**"
|
| 26 |
|
| 27 |
|
| 28 |
def _format_context(hits: list[Hit]) -> str:
|
|
|
|
| 38 |
return "\n\n".join(blocks)
|
| 39 |
|
| 40 |
|
| 41 |
+
def _clean_assistant(content: str) -> str:
|
| 42 |
+
"""Strip our validation/sources/notes so only the model's answer remains."""
|
| 43 |
+
return content.split(VALIDATION_DELIM, 1)[0].rstrip()
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def _normalize(history) -> list[dict]:
|
| 47 |
+
"""Flatten gradio history (messages OR tuples format) to role/content dicts."""
|
| 48 |
+
out: list[dict] = []
|
| 49 |
+
for item in history or []:
|
| 50 |
+
if isinstance(item, dict) and "role" in item:
|
| 51 |
+
role, content = item.get("role"), item.get("content", "")
|
| 52 |
+
if not isinstance(content, str): # skip file/component messages
|
| 53 |
+
continue
|
| 54 |
+
if role == "assistant":
|
| 55 |
+
content = _clean_assistant(content)
|
| 56 |
+
if content.strip():
|
| 57 |
+
out.append({"role": role, "content": content})
|
| 58 |
+
elif isinstance(item, (list, tuple)) and len(item) == 2:
|
| 59 |
+
u, a = item
|
| 60 |
+
if isinstance(u, str) and u.strip():
|
| 61 |
+
out.append({"role": "user", "content": u})
|
| 62 |
+
if isinstance(a, str) and a.strip():
|
| 63 |
+
out.append({"role": "assistant", "content": _clean_assistant(a)})
|
| 64 |
+
return out
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def _prepare_history(history, max_turns: int) -> list[dict]:
|
| 68 |
+
"""Last `max_turns` exchanges, sanitized and char-bounded, starting on a user turn."""
|
| 69 |
+
if max_turns <= 0: # 0 = single-turn ([-0:] would keep ALL)
|
| 70 |
+
return []
|
| 71 |
+
msgs = _normalize(history)[-2 * max_turns:]
|
| 72 |
+
# Apply the char budget from the most recent message backwards.
|
| 73 |
+
total, bounded = 0, []
|
| 74 |
+
for m in reversed(msgs):
|
| 75 |
+
total += len(m["content"])
|
| 76 |
+
if bounded and total > MAX_HISTORY_CHARS:
|
| 77 |
+
break
|
| 78 |
+
bounded.append(m)
|
| 79 |
+
bounded.reverse()
|
| 80 |
+
while bounded and bounded[0]["role"] != "user": # don't start on an assistant turn
|
| 81 |
+
bounded.pop(0)
|
| 82 |
+
return bounded
|
| 83 |
+
|
| 84 |
+
|
| 85 |
def build_messages(question: str, hits: list[Hit],
|
| 86 |
+
history=None, max_turns: int = MAX_HISTORY_TURNS) -> list[dict]:
|
| 87 |
+
"""Build chat-template messages: system + bounded history + context+question."""
|
| 88 |
context = _format_context(hits)
|
| 89 |
messages: list[dict] = [{"role": "system", "content": SYSTEM_PROMPT}]
|
| 90 |
+
messages.extend(_prepare_history(history, max_turns))
|
|
|
|
| 91 |
user = question if not context else (
|
| 92 |
f"Reference GDScript snippets from a curated Godot corpus:\n\n"
|
| 93 |
f"{context}\n\n---\n\nQuestion: {question}"
|