vivekchakraverty Claude Opus 4.8 commited on
Commit
635e6fb
Β·
1 Parent(s): 6246295

Auto-correct EVERY broken GDScript block in place (capped at MAX_FIX_PASSES)

Browse files

respond() now repairs each broken fenced block instead of only the first:
- validate.gdscript_block_spans() locates every block with its span;
first_gdscript_block() pulls the corrected code out of each fix pass.
- _autocorrect() walks the blocks, regenerates a fix for each broken one
(one GPU call each), and splices the corrected code back IN PLACE of the
original (only if the fix actually parses), preserving surrounding prose.
- Capped at MAX_FIX_PASSES (3) GPU calls per answer so a pathological answer
with many broken blocks can't blow the GPU budget; the report notes how
many were fixed / left.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>

Files changed (2) hide show
  1. app.py +47 -14
  2. validate.py +19 -0
app.py CHANGED
@@ -29,6 +29,11 @@ import prompt as promptlib
29
  import generate as gen
30
  import validate as gdv
31
 
 
 
 
 
 
32
 
33
  def _sources_md(hits: list[rag.Hit]) -> str:
34
  if not hits:
@@ -43,6 +48,36 @@ def _sources_md(hits: list[rag.Hit]) -> str:
43
  return "\n".join(lines)
44
 
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  def respond(message: str, history, top_k: int, self_correct: bool,
47
  history_turns: int = promptlib.MAX_HISTORY_TURNS):
48
  message = (message or "").strip()
@@ -54,28 +89,26 @@ def respond(message: str, history, top_k: int, self_correct: bool,
54
  max_turns=int(history_turns))
55
  answer = gen.generate(messages)
56
 
57
- results = gdv.validate_answer(answer)
58
-
59
- # One optional self-correction pass if a code block failed to parse.
60
  if self_correct:
61
- fail = gdv.first_syntax_error(results)
62
- if fail is not None:
63
- broken, err = fail
64
- fixed = gen.generate(promptlib.build_fix_messages(broken, err))
65
- fixed_results = gdv.validate_answer(fixed)
66
- if fixed_results and all(r.ok for r in fixed_results):
67
- answer = (answer
68
- + "\n\n---\n**πŸ”§ Auto-corrected** (original had a syntax "
69
- "error):\n\n" + fixed)
70
- results = fixed_results
71
 
 
72
  report = gdv.render_report(results)
73
  note = ("" if rag.index_available()
74
  else "\n\n> ⏳ _Retrieval index not loaded yet β€” answering without "
75
  "corpus context. Build & push the index (see DEPLOY.md)._")
76
  # The VALIDATION_DELIM prefix lets prompt._clean_assistant strip this
77
  # decoration when the turn is fed back as multi-turn history.
78
- return (f"{answer}{promptlib.VALIDATION_DELIM} \n{report}"
79
  f"{_sources_md(hits)}{note}")
80
 
81
 
 
29
  import generate as gen
30
  import validate as gdv
31
 
32
+ # Cap on auto-correction GPU calls per answer. Each broken block fixed is one
33
+ # extra gen.generate() (one @spaces.GPU call), so this bounds the total GPU
34
+ # work a single pathological answer (many broken blocks) can trigger.
35
+ MAX_FIX_PASSES = 3
36
+
37
 
38
  def _sources_md(hits: list[rag.Hit]) -> str:
39
  if not hits:
 
48
  return "\n".join(lines)
49
 
50
 
51
+ def _autocorrect(answer: str) -> tuple[str, int, int]:
52
+ """Repair each broken ```gdscript block IN PLACE, fixing at most
53
+ MAX_FIX_PASSES of them (one GPU call each). Returns
54
+ (new_answer, num_fixed, num_broken). num_broken counts every block that
55
+ failed to parse (including any beyond the cap, so the caller can report
56
+ how many were left)."""
57
+ spans = gdv.gdscript_block_spans(answer)
58
+ pieces: list[str] = []
59
+ cursor = num_fixed = num_broken = passes = 0
60
+ for code, start, end in spans:
61
+ pieces.append(answer[cursor:start])
62
+ block_text = answer[start:end] # the whole ```...``` fence
63
+ res = gdv.validate_code(code)
64
+ if not res.ok:
65
+ num_broken += 1
66
+ if passes < MAX_FIX_PASSES:
67
+ passes += 1
68
+ fix_out = gen.generate(promptlib.build_fix_messages(code, res.error))
69
+ fix_code = gdv.first_gdscript_block(fix_out)
70
+ # Only splice in a fix that actually parses; otherwise keep the
71
+ # original (it stays flagged ❌ in the validation report).
72
+ if fix_code and gdv.validate_code(fix_code).ok:
73
+ block_text = f"```gdscript\n{fix_code}\n```"
74
+ num_fixed += 1
75
+ pieces.append(block_text)
76
+ cursor = end
77
+ pieces.append(answer[cursor:])
78
+ return "".join(pieces), num_fixed, num_broken
79
+
80
+
81
  def respond(message: str, history, top_k: int, self_correct: bool,
82
  history_turns: int = promptlib.MAX_HISTORY_TURNS):
83
  message = (message or "").strip()
 
89
  max_turns=int(history_turns))
90
  answer = gen.generate(messages)
91
 
92
+ # Self-correction: repair EVERY broken GDScript block in place, capped at
93
+ # MAX_FIX_PASSES GPU calls so a pathological answer can't blow the budget.
94
+ fix_note = ""
95
  if self_correct:
96
+ answer, n_fixed, n_broken = _autocorrect(answer)
97
+ if n_broken:
98
+ head = f"πŸ”§ Auto-corrected {n_fixed}/{n_broken} broken block(s) in place"
99
+ if n_broken > MAX_FIX_PASSES:
100
+ head += f" β€” capped at {MAX_FIX_PASSES} fix passes, " \
101
+ f"{n_broken - MAX_FIX_PASSES} not attempted"
102
+ fix_note = f"\n\n**{head}.**"
 
 
 
103
 
104
+ results = gdv.validate_answer(answer)
105
  report = gdv.render_report(results)
106
  note = ("" if rag.index_available()
107
  else "\n\n> ⏳ _Retrieval index not loaded yet β€” answering without "
108
  "corpus context. Build & push the index (see DEPLOY.md)._")
109
  # The VALIDATION_DELIM prefix lets prompt._clean_assistant strip this
110
  # decoration when the turn is fed back as multi-turn history.
111
+ return (f"{answer}{promptlib.VALIDATION_DELIM} \n{report}{fix_note}"
112
  f"{_sources_md(hits)}{note}")
113
 
114
 
validate.py CHANGED
@@ -100,6 +100,25 @@ def validate_answer(answer: str) -> list[BlockResult]:
100
  return [validate_code(b) for b in extract_gdscript_blocks(answer)]
101
 
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  def render_report(results: list[BlockResult]) -> str:
104
  """Markdown summary for the UI."""
105
  if not results:
 
100
  return [validate_code(b) for b in extract_gdscript_blocks(answer)]
101
 
102
 
103
+ def gdscript_block_spans(text: str) -> list[tuple[str, int, int]]:
104
+ """Each fenced GDScript block as (stripped_code, match_start, match_end), in
105
+ document order. The span covers the whole ```...``` fence so a caller can
106
+ splice a corrected block back in place of the original."""
107
+ out: list[tuple[str, int, int]] = []
108
+ for m in _FENCE_RE.finditer(text or ""):
109
+ code = m.group(1).strip()
110
+ if code:
111
+ out.append((code, m.start(), m.end()))
112
+ return out
113
+
114
+
115
+ def first_gdscript_block(text: str) -> str:
116
+ """First fenced GDScript block (stripped), or '' if none β€” used to pull the
117
+ corrected code out of a fix generation."""
118
+ blocks = extract_gdscript_blocks(text)
119
+ return blocks[0] if blocks else ""
120
+
121
+
122
  def render_report(results: list[BlockResult]) -> str:
123
  """Markdown summary for the UI."""
124
  if not results: