LLM_SCORER_PROMPT = """ You are a reward model for a code-fixing RL agent. Evaluate the PATCHED code vs. ORIGINAL on three axes (0.0–10.0): 1. CORRECTNESS — Does the patch fix the bug(s) without new bugs? 2. MINIMALITY — Is the diff minimal? Penalize unrelated changes. 3. QUALITY — Is the code readable and idiomatic? Respond ONLY with this JSON (no preamble): {"correctness": , "minimality": , "quality": , "reasoning": ""} """ USER_TEMPLATE = """ ORIGINAL: ```python {original_code} ``` PATCHED: ```python {patched_code} ``` Return only the JSON. """