vivekchakraverty commited on
Commit
0298f08
·
verified ·
1 Parent(s): e48654b

Add bounded multi-turn chat memory (prompt.py)

Browse files
Files changed (1) hide show
  1. prompt.py +57 -5
prompt.py CHANGED
@@ -13,7 +13,16 @@ SYSTEM_PROMPT = (
13
  )
14
 
15
  # Keep the context budget modest so generation stays fast on ZeroGPU.
16
- MAX_CONTEXT_CHARS = 6000
 
 
 
 
 
 
 
 
 
17
 
18
 
19
  def _format_context(hits: list[Hit]) -> str:
@@ -29,13 +38,56 @@ def _format_context(hits: list[Hit]) -> str:
29
  return "\n\n".join(blocks)
30
 
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  def build_messages(question: str, hits: list[Hit],
33
- history: list[dict] | None = None) -> list[dict]:
34
- """Build chat-template messages for the generator."""
35
  context = _format_context(hits)
36
  messages: list[dict] = [{"role": "system", "content": SYSTEM_PROMPT}]
37
- if history:
38
- messages.extend(history)
39
  user = question if not context else (
40
  f"Reference GDScript snippets from a curated Godot corpus:\n\n"
41
  f"{context}\n\n---\n\nQuestion: {question}"
 
13
  )
14
 
15
  # Keep the context budget modest so generation stays fast on ZeroGPU.
16
+ MAX_CONTEXT_CHARS = 5000 # trimmed a bit since history now shares the prompt
17
+
18
+ # Multi-turn memory bounds (keep prompts — and ZeroGPU time — bounded).
19
+ MAX_HISTORY_TURNS = 4 # default # of prior user+assistant exchanges kept
20
+ MAX_HISTORY_CHARS = 6000 # hard cap on total history text fed to the model
21
+
22
+ # Marker that app.py appends after the model's answer; everything from here on is
23
+ # our validation/sources/notes decoration and must be stripped before the answer
24
+ # is fed back as conversation history.
25
+ VALIDATION_DELIM = "\n\n---\n**Validation:**"
26
 
27
 
28
  def _format_context(hits: list[Hit]) -> str:
 
38
  return "\n\n".join(blocks)
39
 
40
 
41
+ def _clean_assistant(content: str) -> str:
42
+ """Strip our validation/sources/notes so only the model's answer remains."""
43
+ return content.split(VALIDATION_DELIM, 1)[0].rstrip()
44
+
45
+
46
+ def _normalize(history) -> list[dict]:
47
+ """Flatten gradio history (messages OR tuples format) to role/content dicts."""
48
+ out: list[dict] = []
49
+ for item in history or []:
50
+ if isinstance(item, dict) and "role" in item:
51
+ role, content = item.get("role"), item.get("content", "")
52
+ if not isinstance(content, str): # skip file/component messages
53
+ continue
54
+ if role == "assistant":
55
+ content = _clean_assistant(content)
56
+ if content.strip():
57
+ out.append({"role": role, "content": content})
58
+ elif isinstance(item, (list, tuple)) and len(item) == 2:
59
+ u, a = item
60
+ if isinstance(u, str) and u.strip():
61
+ out.append({"role": "user", "content": u})
62
+ if isinstance(a, str) and a.strip():
63
+ out.append({"role": "assistant", "content": _clean_assistant(a)})
64
+ return out
65
+
66
+
67
+ def _prepare_history(history, max_turns: int) -> list[dict]:
68
+ """Last `max_turns` exchanges, sanitized and char-bounded, starting on a user turn."""
69
+ if max_turns <= 0: # 0 = single-turn ([-0:] would keep ALL)
70
+ return []
71
+ msgs = _normalize(history)[-2 * max_turns:]
72
+ # Apply the char budget from the most recent message backwards.
73
+ total, bounded = 0, []
74
+ for m in reversed(msgs):
75
+ total += len(m["content"])
76
+ if bounded and total > MAX_HISTORY_CHARS:
77
+ break
78
+ bounded.append(m)
79
+ bounded.reverse()
80
+ while bounded and bounded[0]["role"] != "user": # don't start on an assistant turn
81
+ bounded.pop(0)
82
+ return bounded
83
+
84
+
85
  def build_messages(question: str, hits: list[Hit],
86
+ history=None, max_turns: int = MAX_HISTORY_TURNS) -> list[dict]:
87
+ """Build chat-template messages: system + bounded history + context+question."""
88
  context = _format_context(hits)
89
  messages: list[dict] = [{"role": "system", "content": SYSTEM_PROMPT}]
90
+ messages.extend(_prepare_history(history, max_turns))
 
91
  user = question if not context else (
92
  f"Reference GDScript snippets from a curated Godot corpus:\n\n"
93
  f"{context}\n\n---\n\nQuestion: {question}"