Spaces:
Running on Zero
Running on Zero
Commit Β·
635e6fb
1
Parent(s): 6246295
Auto-correct EVERY broken GDScript block in place (capped at MAX_FIX_PASSES)
Browse filesrespond() now repairs each broken fenced block instead of only the first:
- validate.gdscript_block_spans() locates every block with its span;
first_gdscript_block() pulls the corrected code out of each fix pass.
- _autocorrect() walks the blocks, regenerates a fix for each broken one
(one GPU call each), and splices the corrected code back IN PLACE of the
original (only if the fix actually parses), preserving surrounding prose.
- Capped at MAX_FIX_PASSES (3) GPU calls per answer so a pathological answer
with many broken blocks can't blow the GPU budget; the report notes how
many were fixed / left.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
- app.py +47 -14
- validate.py +19 -0
app.py
CHANGED
|
@@ -29,6 +29,11 @@ import prompt as promptlib
|
|
| 29 |
import generate as gen
|
| 30 |
import validate as gdv
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
def _sources_md(hits: list[rag.Hit]) -> str:
|
| 34 |
if not hits:
|
|
@@ -43,6 +48,36 @@ def _sources_md(hits: list[rag.Hit]) -> str:
|
|
| 43 |
return "\n".join(lines)
|
| 44 |
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
def respond(message: str, history, top_k: int, self_correct: bool,
|
| 47 |
history_turns: int = promptlib.MAX_HISTORY_TURNS):
|
| 48 |
message = (message or "").strip()
|
|
@@ -54,28 +89,26 @@ def respond(message: str, history, top_k: int, self_correct: bool,
|
|
| 54 |
max_turns=int(history_turns))
|
| 55 |
answer = gen.generate(messages)
|
| 56 |
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
if self_correct:
|
| 61 |
-
|
| 62 |
-
if
|
| 63 |
-
broken
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
+ "\n\n---\n**π§ Auto-corrected** (original had a syntax "
|
| 69 |
-
"error):\n\n" + fixed)
|
| 70 |
-
results = fixed_results
|
| 71 |
|
|
|
|
| 72 |
report = gdv.render_report(results)
|
| 73 |
note = ("" if rag.index_available()
|
| 74 |
else "\n\n> β³ _Retrieval index not loaded yet β answering without "
|
| 75 |
"corpus context. Build & push the index (see DEPLOY.md)._")
|
| 76 |
# The VALIDATION_DELIM prefix lets prompt._clean_assistant strip this
|
| 77 |
# decoration when the turn is fed back as multi-turn history.
|
| 78 |
-
return (f"{answer}{promptlib.VALIDATION_DELIM} \n{report}"
|
| 79 |
f"{_sources_md(hits)}{note}")
|
| 80 |
|
| 81 |
|
|
|
|
| 29 |
import generate as gen
|
| 30 |
import validate as gdv
|
| 31 |
|
| 32 |
+
# Cap on auto-correction GPU calls per answer. Each broken block fixed is one
|
| 33 |
+
# extra gen.generate() (one @spaces.GPU call), so this bounds the total GPU
|
| 34 |
+
# work a single pathological answer (many broken blocks) can trigger.
|
| 35 |
+
MAX_FIX_PASSES = 3
|
| 36 |
+
|
| 37 |
|
| 38 |
def _sources_md(hits: list[rag.Hit]) -> str:
|
| 39 |
if not hits:
|
|
|
|
| 48 |
return "\n".join(lines)
|
| 49 |
|
| 50 |
|
| 51 |
+
def _autocorrect(answer: str) -> tuple[str, int, int]:
|
| 52 |
+
"""Repair each broken ```gdscript block IN PLACE, fixing at most
|
| 53 |
+
MAX_FIX_PASSES of them (one GPU call each). Returns
|
| 54 |
+
(new_answer, num_fixed, num_broken). num_broken counts every block that
|
| 55 |
+
failed to parse (including any beyond the cap, so the caller can report
|
| 56 |
+
how many were left)."""
|
| 57 |
+
spans = gdv.gdscript_block_spans(answer)
|
| 58 |
+
pieces: list[str] = []
|
| 59 |
+
cursor = num_fixed = num_broken = passes = 0
|
| 60 |
+
for code, start, end in spans:
|
| 61 |
+
pieces.append(answer[cursor:start])
|
| 62 |
+
block_text = answer[start:end] # the whole ```...``` fence
|
| 63 |
+
res = gdv.validate_code(code)
|
| 64 |
+
if not res.ok:
|
| 65 |
+
num_broken += 1
|
| 66 |
+
if passes < MAX_FIX_PASSES:
|
| 67 |
+
passes += 1
|
| 68 |
+
fix_out = gen.generate(promptlib.build_fix_messages(code, res.error))
|
| 69 |
+
fix_code = gdv.first_gdscript_block(fix_out)
|
| 70 |
+
# Only splice in a fix that actually parses; otherwise keep the
|
| 71 |
+
# original (it stays flagged β in the validation report).
|
| 72 |
+
if fix_code and gdv.validate_code(fix_code).ok:
|
| 73 |
+
block_text = f"```gdscript\n{fix_code}\n```"
|
| 74 |
+
num_fixed += 1
|
| 75 |
+
pieces.append(block_text)
|
| 76 |
+
cursor = end
|
| 77 |
+
pieces.append(answer[cursor:])
|
| 78 |
+
return "".join(pieces), num_fixed, num_broken
|
| 79 |
+
|
| 80 |
+
|
| 81 |
def respond(message: str, history, top_k: int, self_correct: bool,
|
| 82 |
history_turns: int = promptlib.MAX_HISTORY_TURNS):
|
| 83 |
message = (message or "").strip()
|
|
|
|
| 89 |
max_turns=int(history_turns))
|
| 90 |
answer = gen.generate(messages)
|
| 91 |
|
| 92 |
+
# Self-correction: repair EVERY broken GDScript block in place, capped at
|
| 93 |
+
# MAX_FIX_PASSES GPU calls so a pathological answer can't blow the budget.
|
| 94 |
+
fix_note = ""
|
| 95 |
if self_correct:
|
| 96 |
+
answer, n_fixed, n_broken = _autocorrect(answer)
|
| 97 |
+
if n_broken:
|
| 98 |
+
head = f"π§ Auto-corrected {n_fixed}/{n_broken} broken block(s) in place"
|
| 99 |
+
if n_broken > MAX_FIX_PASSES:
|
| 100 |
+
head += f" β capped at {MAX_FIX_PASSES} fix passes, " \
|
| 101 |
+
f"{n_broken - MAX_FIX_PASSES} not attempted"
|
| 102 |
+
fix_note = f"\n\n**{head}.**"
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
+
results = gdv.validate_answer(answer)
|
| 105 |
report = gdv.render_report(results)
|
| 106 |
note = ("" if rag.index_available()
|
| 107 |
else "\n\n> β³ _Retrieval index not loaded yet β answering without "
|
| 108 |
"corpus context. Build & push the index (see DEPLOY.md)._")
|
| 109 |
# The VALIDATION_DELIM prefix lets prompt._clean_assistant strip this
|
| 110 |
# decoration when the turn is fed back as multi-turn history.
|
| 111 |
+
return (f"{answer}{promptlib.VALIDATION_DELIM} \n{report}{fix_note}"
|
| 112 |
f"{_sources_md(hits)}{note}")
|
| 113 |
|
| 114 |
|
validate.py
CHANGED
|
@@ -100,6 +100,25 @@ def validate_answer(answer: str) -> list[BlockResult]:
|
|
| 100 |
return [validate_code(b) for b in extract_gdscript_blocks(answer)]
|
| 101 |
|
| 102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
def render_report(results: list[BlockResult]) -> str:
|
| 104 |
"""Markdown summary for the UI."""
|
| 105 |
if not results:
|
|
|
|
| 100 |
return [validate_code(b) for b in extract_gdscript_blocks(answer)]
|
| 101 |
|
| 102 |
|
| 103 |
+
def gdscript_block_spans(text: str) -> list[tuple[str, int, int]]:
|
| 104 |
+
"""Each fenced GDScript block as (stripped_code, match_start, match_end), in
|
| 105 |
+
document order. The span covers the whole ```...``` fence so a caller can
|
| 106 |
+
splice a corrected block back in place of the original."""
|
| 107 |
+
out: list[tuple[str, int, int]] = []
|
| 108 |
+
for m in _FENCE_RE.finditer(text or ""):
|
| 109 |
+
code = m.group(1).strip()
|
| 110 |
+
if code:
|
| 111 |
+
out.append((code, m.start(), m.end()))
|
| 112 |
+
return out
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def first_gdscript_block(text: str) -> str:
|
| 116 |
+
"""First fenced GDScript block (stripped), or '' if none β used to pull the
|
| 117 |
+
corrected code out of a fix generation."""
|
| 118 |
+
blocks = extract_gdscript_blocks(text)
|
| 119 |
+
return blocks[0] if blocks else ""
|
| 120 |
+
|
| 121 |
+
|
| 122 |
def render_report(results: list[BlockResult]) -> str:
|
| 123 |
"""Markdown summary for the UI."""
|
| 124 |
if not results:
|