Spaces:

Ramkan7
/

Patch_Hawk

Running

App Files Files Community

kanishcr7 commited on Apr 7

Commit

d6abea2

1 Parent(s): 93d7cd0

Final check:Passed

Browse files

Files changed (9) hide show

.gitignore +1 -0
inference.py +148 -26
patchhawk/agent/environment.py +10 -2
patchhawk/agent/sandbox.py +3 -3
patchhawk/app/dashboard.py +18 -3
patchhawk/data/generate_scenarios.py +19 -9
patchhawk/data/scenarios.json +0 -0
patchhawk/env_models.py +6 -0
patchhawk/training/train_grpo.py +22 -4

.gitignore CHANGED Viewed

@@ -2,6 +2,7 @@
 __pycache__/
 *.py[codz]
 *$py.class
 # C extensions
 *.so

 __pycache__/
 *.py[codz]
 *$py.class
+wandb/
 # C extensions
 *.so

inference.py CHANGED Viewed

@@ -2,7 +2,7 @@
 """
 PatchHawk inference script — runs the LLM agent loop against the
 OpenEnv-compliant PatchHawkEnv.
 Environment variables:
     API_BASE_URL   – OpenAI-compatible API endpoint (required unless DRY_RUN=1)
     MODEL_NAME     – Model identifier (default: meta-llama/Llama-3.2-3B-Instruct)
@@ -29,11 +29,17 @@ from patchhawk.env_models import PatchHawkAction, PatchHawkObservation, PatchHaw
 from patchhawk import tasks as graders
 # ── Configuration ────────────────────────────────────────────────────
 API_BASE_URL = os.getenv(
     "API_BASE_URL", "https://router.huggingface.co/hf-inference/v1"
 )
-MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-Coder-32B-Instruct")
 HF_TOKEN = os.getenv("HF_TOKEN", "")
 DRY_RUN = os.getenv("DRY_RUN", "0") == "1"
 SINGLE_TASK = os.getenv("TASK", "")
@@ -59,19 +65,53 @@ TASK_DEFS = [
 # ── Prompt builder ───────────────────────────────────────────────────
 SYSTEM_PROMPT = """\
-You are PatchHawk, a security agent that detects supply-chain vulnerabilities
-in Python code. You will be given a code snippet and static analysis flags.
-Respond with a JSON object containing:
 {
-  "action_type": <int>,     // 0=ANALYZE, 1=EXECUTE_SANDBOX, 2=BLOCK_PR, 3=SUBMIT_PATCH, 4=REQUEST_REVIEW
-  "patch_content": <str|null>  // required if action_type == 3
 }
-Be decisive. If the code is clearly malicious, BLOCK_PR (2). If you can
-generate a patch that removes the vulnerability, use SUBMIT_PATCH (3).
 """
 def _build_user_prompt(obs: PatchHawkObservation, step: int) -> str:
     parts = [
@@ -89,37 +129,119 @@ def _build_user_prompt(obs: PatchHawkObservation, step: int) -> str:
 # ── LLM caller ───────────────────────────────────────────────────────
-def _call_llm(messages: list[dict]) -> str:
-    """Call the OpenAI-compatible LLM and return the text content."""
-    from openai import OpenAI
-    client = OpenAI(
-        base_url=API_BASE_URL,
-        api_key=HF_TOKEN or "no-key",
     )
-    response = client.chat.completions.create(
-        model=MODEL_NAME,
-        messages=messages,
         temperature=0.2,
-        max_tokens=512,
     )
-    return response.choices[0].message.content or ""
 def _parse_action(text: str) -> PatchHawkAction:
     """Parse LLM response text into a PatchHawkAction."""
-    # Try to extract JSON from the response
     text = text.strip()
-    # Handle markdown code blocks
     if "```json" in text:
         text = text.split("```json")[1].split("```")[0].strip()
-    elif "```" in text:
         text = text.split("```")[1].split("```")[0].strip()
-    data = json.loads(text)
     return PatchHawkAction(
-        action_type=int(data["action_type"]),
-        patch_content=data.get("patch_content"),
     )

 """
 PatchHawk inference script — runs the LLM agent loop against the
 OpenEnv-compliant PatchHawkEnv.
+a
 Environment variables:
     API_BASE_URL   – OpenAI-compatible API endpoint (required unless DRY_RUN=1)
     MODEL_NAME     – Model identifier (default: meta-llama/Llama-3.2-3B-Instruct)
 from patchhawk import tasks as graders
 # ── Configuration ────────────────────────────────────────────────────
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+except ImportError:
+    pass
 API_BASE_URL = os.getenv(
     "API_BASE_URL", "https://router.huggingface.co/hf-inference/v1"
 )
+# Prefer explicit MODEL_NAME, fallback to GRPO_POLICY_MODEL from .env, then default to 32B model.
+MODEL_NAME = os.getenv("MODEL_NAME", os.getenv("GRPO_POLICY_MODEL", "Qwen/Qwen2.5-Coder-32B-Instruct"))
 HF_TOKEN = os.getenv("HF_TOKEN", "")
 DRY_RUN = os.getenv("DRY_RUN", "0") == "1"
 SINGLE_TASK = os.getenv("TASK", "")
 # ── Prompt builder ───────────────────────────────────────────────────
 SYSTEM_PROMPT = """\
+You are PatchHawk, a security agent that detects supply-chain vulnerabilities in Python code.
+Given a code snippet and static analysis flags, you must respond **EXACTLY** with a single JSON object. No extra text, no markdown formatting.
+## Output JSON Schema
 {
+  "reasoning": "<str>",         // Mandatory: Explain what the vulnerability is, why it's dangerous, and your recommended fix (if any).
+  "risk_score": <float>,        // 0.0 (no risk) to 1.0 (critical). Be precise to two decimals.
+  "action_type": <int>,         // One of: 0=ANALYZE, 1=EXECUTE_SANDBOX, 2=BLOCK_PR, 3=SUBMIT_PATCH, 4=REQUEST_REVIEW
+  "patch_content": "<str|null>" // Full patched code if action_type=3, otherwise null. Must be valid Python.
 }
+## Action Type Guidelines
+- **0 ANALYZE** – No immediate threat, but needs deeper review.
+- **1 EXECUTE_SANDBOX** – Suspicious but not obviously malicious; run in isolated environment.
+- **2 BLOCK_PR** – Severely malicious, unfixable (e.g., hidden backdoor, remote shell). Reject PR.
+- **3 SUBMIT_PATCH** – Vulnerability can be fixed. Provide corrected code in `patch_content`.
+- **4 REQUEST_REVIEW** – Complex or ambiguous; require human expert.
+## Rules
+- `reasoning` must be thorough: describe the flaw, its impact (CWE if known), and step‑by‑step how to patch.
+- Escape all double quotes inside strings with backslash (`\"`).
+- If the code is benign, set `risk_score` ≤ 0.2, `action_type` = 0, and `patch_content` = null.
+- Never include comments or explanations outside the JSON object.
+**Example valid response:**
+{"reasoning": "Hardcoded password 'admin123' in __init__ allows credential bypass. Replace with env var.", "risk_score": 0.85, "action_type": 3, "patch_content": "import os\\nclass Malicious:\\n    def __init__(self):\\n        self.cache = []\\n        self.password = os.getenv('DB_PASS')\\n    ..."}
 """
+# SYSTEM_PROMPT = """\
+# You are PatchHawk, a security agent that detects supply-chain vulnerabilities
+# in Python code. You will be given a code snippet and static analysis flags.
+# Respond EXACTLY with a JSON object containing the following keys:
+# {
+#   "reasoning": "<str>",         // Step-by-step explanation of what the vulnerability is, why you are blocking/patching it, and how it can be fixed.
+#   "risk_score": <float>,        // Your predicted risk score from 0.0 to 1.0 based on your analysis
+#   "action_type": <int>,         // 0=ANALYZE, 1=EXECUTE_SANDBOX, 2=BLOCK_PR, 3=SUBMIT_PATCH, 4=REQUEST_REVIEW
+#   "patch_content": "<str|null>" // The full patched python code fixing the vulnerability
+# }
+# Be decisive. First, explain your findings thoroughly in the "reasoning" field.
+# If the code is malicious but you can fix the vulnerability, use SUBMIT_PATCH (3) and provide the safe, corrected code in "patch_content".
+# If the code is severely malicious and completely unfixable, use BLOCK_PR (2).
+# IMPORTANT: Ensure your output is perfectly VALID JSON. Escape all double quotes inside strings properly.
+# """
 def _build_user_prompt(obs: PatchHawkObservation, step: int) -> str:
     parts = [
 # ── LLM caller ───────────────────────────────────────────────────────
+_local_pipeline = None
+def _call_llm_local(messages: list[dict]) -> str:
+    """Call a local HuggingFace model using transformers pipeline if remote API fails."""
+    global _local_pipeline
+    if _local_pipeline is None:
+        import torch
+        from transformers import pipeline
+        # User is already using this model in .env GRPO_POLICY_MODEL
+        local_model = os.getenv("GRPO_POLICY_MODEL", "unsloth/Qwen2.5-Coder-3B-Instruct")
+        print(f"\n[Fallback] Loading local model: {local_model} into memory. This may take a moment...", flush=True)
+        _local_pipeline = pipeline(
+            "text-generation",
+            model=local_model,
+            model_kwargs={"torch_dtype": torch.bfloat16},  # Half-precision to save VRAM natively fit on 12GB
+            device_map="auto"
+        )
+        print("[Fallback] Local model loaded successfully.\n", flush=True)
+    # Format messages array to a standard conversational string format
+    prompt = _local_pipeline.tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
     )
+    # Run Generation
+    outputs = _local_pipeline(
+        prompt,
+        max_new_tokens=2048,
+        do_sample=True,
         temperature=0.2,
     )
+    generated = outputs[0]["generated_text"]
+    print(f"\ngenerated:{generated}\n")
+    # Strip prompt from returned generated output
+    if generated.startswith(prompt):
+        generated = generated[len(prompt):]
+    return generated.strip()
+def _call_llm(messages: list[dict]) -> str:
+    """Call the OpenAI-compatible LLM and return the text content."""
+    from openai import OpenAI
+    try:
+        client = OpenAI(
+            base_url=API_BASE_URL,
+            api_key=HF_TOKEN or "no-key",
+        )
+        response = client.chat.completions.create(
+            model=MODEL_NAME,
+            messages=messages,
+            temperature=0.2,
+            max_tokens=512,
+        )
+        return response.choices[0].message.content or ""
+    except Exception as e:
+        print(f"[LLM ERROR] Remote API failed: {e}. Initiating local Fallback...", flush=True)
+        return _call_llm_local(messages)
+import re
 def _parse_action(text: str) -> PatchHawkAction:
     """Parse LLM response text into a PatchHawkAction."""
     text = text.strip()
     if "```json" in text:
         text = text.split("```json")[1].split("```")[0].strip()
+    elif "```" in text and not text.startswith("{"):
         text = text.split("```")[1].split("```")[0].strip()
+    def clean_patch(p: str) -> str:
+        if not p: return p
+        if "```python" in p:
+            return p.split("```python")[1].split("```")[0].strip()
+        if "```" in p:
+            return p.split("```")[1].split("```")[0].strip()
+        return p
+    try:
+        data = json.loads(text)
+    except json.JSONDecodeError:
+        action_match = re.search(r'"action_type"\s*:\s*(\d+)', text)
+        action_type = int(action_match.group(1)) if action_match else 2
+        risk_match = re.search(r'"risk_score"\s*:\s*([\d\.]+)', text)
+        risk_score = float(risk_match.group(1)) if risk_match else None
+        patch_match = re.search(r'"patch_content"\s*:\s*"(.*)', text, re.DOTALL)
+        patch_content = None
+        if patch_match:
+            raw_patch = patch_match.group(1).rsplit('"', 1)[0]
+            raw_patch = raw_patch.replace("\\n", "\n").replace('\\"', '"').replace("\\\\", "\\")
+            patch_content = clean_patch(raw_patch)
+        return PatchHawkAction(
+            action_type=action_type,
+            reasoning="JSON Error/Truncated Output. Recovered partial data.",
+            predicted_risk=risk_score,
+            patch_content=patch_content
+        )
     return PatchHawkAction(
+        action_type=int(data.get("action_type", 2)),
+        patch_content=clean_patch(data.get("patch_content")),
+        reasoning=data.get("reasoning"),
+        predicted_risk=data.get("risk_score"),
     )

patchhawk/agent/environment.py CHANGED Viewed

@@ -308,8 +308,16 @@ class PatchHawkEnv(Environment[PatchHawkAction, PatchHawkObservation, PatchHawkS
         if self.step_counter >= self.max_steps and not done:
             done = True
             if label == "malicious":
-                reward = -5.0
-                reason = "max steps reached on malicious scenario"
         self.cumulative_reward += reward

         if self.step_counter >= self.max_steps and not done:
             done = True
             if label == "malicious":
+                reward -= 5.0
+                reason += " | max steps reached on malicious scenario"
+        # ── Dynamic Risk Bonus ───────────────────────────────────
+        predict_risk = getattr(action, "predicted_risk", None)
+        if predict_risk is not None:
+            actual_risk = 1.0 if label == "malicious" else 0.0
+            accuracy_bonus = (1.0 - abs(actual_risk - float(predict_risk))) * 2.0
+            reward += accuracy_bonus
+            reason += f" | AI risk accuracy bonus: +{accuracy_bonus:.2f}"
         self.cumulative_reward += reward

patchhawk/agent/sandbox.py CHANGED Viewed

@@ -31,7 +31,7 @@ def run_code(
     temp_dir = tempfile.mkdtemp(prefix="patchhawk_sandbox_")
     script_path = os.path.join(temp_dir, "script.py")
-    with open(script_path, "w") as f:
         f.write(code)
     result: Dict[str, Any] = {
@@ -91,7 +91,7 @@ def check_syntax(
     temp_dir = tempfile.mkdtemp(prefix="patchhawk_syntax_")
     script_path = os.path.join(temp_dir, "script.py")
-    with open(script_path, "w") as f:
         f.write(code)
     try:
@@ -107,7 +107,7 @@ def check_syntax(
                 "--cpus",
                 "0.5",
                 "-v",
-                f"{temp_dir}:/app:ro",
                 "patchhawk-sandbox:latest",
                 "python",
                 "-m",

     temp_dir = tempfile.mkdtemp(prefix="patchhawk_sandbox_")
     script_path = os.path.join(temp_dir, "script.py")
+    with open(script_path, "w", encoding="utf-8") as f:
         f.write(code)
     result: Dict[str, Any] = {
     temp_dir = tempfile.mkdtemp(prefix="patchhawk_syntax_")
     script_path = os.path.join(temp_dir, "script.py")
+    with open(script_path, "w", encoding="utf-8") as f:
         f.write(code)
     try:
                 "--cpus",
                 "0.5",
                 "-v",
+                f"{temp_dir}:/app:rw",
                 "patchhawk-sandbox:latest",
                 "python",
                 "-m",

patchhawk/app/dashboard.py CHANGED Viewed

@@ -181,7 +181,10 @@ def main():
                     final_action_type = PatchHawkEnv.ACTION_BLOCK_PR
                 else:
                     final_action_type = PatchHawkEnv.ACTION_REQUEST_REVIEW
-                action = PatchHawkAction(action_type=final_action_type)
         # Visual Hacker Terminal Effect
         if final_action_type == PatchHawkEnv.ACTION_SUBMIT_PATCH:
@@ -219,8 +222,13 @@ def main():
         with st.expander("🤖 Agent Thought Process (LLM Trace)"):
             st.markdown(f"```json\n{llm_thought_process}\n```")
         m1, m2, m3 = st.columns(3)
-        m1.metric("Risk Score", f"{risk:.2f}")
         m2.metric("Decision", PatchHawkEnv.ACTION_NAMES[final_action_type])
         m3.metric("Reward", f"{total_reward:+.2f}")
@@ -229,6 +237,10 @@ def main():
         )
         with tab1:
             if final_action_type == PatchHawkEnv.ACTION_BLOCK_PR:
                 st.markdown(
                     "<div class='info-box status-malicious'>⛔ BLOCKED — "
@@ -253,10 +265,13 @@ def main():
         with tab2:
             telem = obs.metadata.get("telemetry")
             if telem:
                 st.json(telem)
             else:
-                st.info("No sandbox execution for this path.")
         with tab3:
             if final_action_type == PatchHawkEnv.ACTION_SUBMIT_PATCH and scenario.get(

                     final_action_type = PatchHawkEnv.ACTION_BLOCK_PR
                 else:
                     final_action_type = PatchHawkEnv.ACTION_REQUEST_REVIEW
+                action = PatchHawkAction(
+                    action_type=final_action_type,
+                    reasoning="Static rule-based fallback decision due to high risk score."
+                )
         # Visual Hacker Terminal Effect
         if final_action_type == PatchHawkEnv.ACTION_SUBMIT_PATCH:
         with st.expander("🤖 Agent Thought Process (LLM Trace)"):
             st.markdown(f"```json\n{llm_thought_process}\n```")
+        # Opt for LLM's predicted risk score if available
+        display_risk = getattr(action, "predicted_risk", None)
+        if display_risk is None:
+            display_risk = risk
         m1, m2, m3 = st.columns(3)
+        m1.metric("Risk Score", f"{float(display_risk):.2f}")
         m2.metric("Decision", PatchHawkEnv.ACTION_NAMES[final_action_type])
         m3.metric("Reward", f"{total_reward:+.2f}")
         )
         with tab1:
+            if hasattr(action, "reasoning") and action.reasoning:
+                st.markdown("### 🧠 Agent's Reasoning")
+                st.info(action.reasoning)
             if final_action_type == PatchHawkEnv.ACTION_BLOCK_PR:
                 st.markdown(
                     "<div class='info-box status-malicious'>⛔ BLOCKED — "
         with tab2:
             telem = obs.metadata.get("telemetry")
+            details = obs.metadata.get("details")
             if telem:
                 st.json(telem)
+            elif dict(details) if details else None:
+                st.json(details)
             else:
+                st.info("No sandbox telemetry generated for this action.")
         with tab3:
             if final_action_type == PatchHawkEnv.ACTION_SUBMIT_PATCH and scenario.get(

patchhawk/data/generate_scenarios.py CHANGED Viewed

@@ -128,12 +128,16 @@ def auto_generate_unit_test(filename: str, code: str) -> str:
 # ============================================================
-def generate_track_b_scenarios(benign_files: list) -> list:
-    """Generate ≥ 50 scenarios: 25 TP, 15 FP, 15 functional."""
     scenarios = []
-    # ── True Positives (25) ──────────────────────────────────
-    for i in range(25):
         bf = random.choice(benign_files)
         attack_name, attack_data = random.choice(list(ATTACK_TEMPLATES.items()))
         malicious_code = attack_data["inject"] + bf["code"]
@@ -187,7 +191,7 @@ def generate_track_b_scenarios(benign_files: list) -> list:
             "result = subprocess.run(['echo', 'build ok'], capture_output=True)\n\n",
         ),
     ]
-    for i in range(15):
         bf = random.choice(benign_files)
         fp_name, fp_code = random.choice(fp_templates)
         suspicious_code = fp_code + bf["code"]
@@ -205,8 +209,8 @@ def generate_track_b_scenarios(benign_files: list) -> list:
             }
         )
-    # ── Functional / Clean (15) ──────────────────────────────
-    for i in range(15):
         bf = random.choice(benign_files)
         test_code = auto_generate_unit_test(bf["filename"], bf["code"])
         scenarios.append(
@@ -222,7 +226,7 @@ def generate_track_b_scenarios(benign_files: list) -> list:
             }
         )
-    return scenarios  # 55 total from Track B alone
 # ============================================================
@@ -486,6 +490,12 @@ def main():
         type=str,
         default="patchhawk/data/scenarios.json",
     )
     parser.add_argument(
         "--use-sdk",
         action="store_true",
@@ -535,7 +545,7 @@ def main():
         return
     # Track B (always)
-    scenarios = generate_track_b_scenarios(benign_files)
     # Track A (optional)
     if args.use_sdk:

 # ============================================================
+def generate_track_b_scenarios(benign_files: list, num_samples: int = 55) -> list:
+    """Generate proportional scenarios dynamically based on num_samples."""
     scenarios = []
+    tp_count = int(num_samples * 0.45)
+    fp_count = int(num_samples * 0.27)
+    fn_count = num_samples - tp_count - fp_count
+    # ── True Positives (45%) ──────────────────────────────────
+    for i in range(tp_count):
         bf = random.choice(benign_files)
         attack_name, attack_data = random.choice(list(ATTACK_TEMPLATES.items()))
         malicious_code = attack_data["inject"] + bf["code"]
             "result = subprocess.run(['echo', 'build ok'], capture_output=True)\n\n",
         ),
     ]
+    for i in range(fp_count):
         bf = random.choice(benign_files)
         fp_name, fp_code = random.choice(fp_templates)
         suspicious_code = fp_code + bf["code"]
             }
         )
+    # ── Functional / Clean (28%) ──────────────────────────────
+    for i in range(fn_count):
         bf = random.choice(benign_files)
         test_code = auto_generate_unit_test(bf["filename"], bf["code"])
         scenarios.append(
             }
         )
+    return scenarios
 # ============================================================
         type=str,
         default="patchhawk/data/scenarios.json",
     )
+    parser.add_argument(
+        "--num-samples",
+        type=int,
+        default=55,
+        help="Number of scenarios to generate with Track B (mutation engine).",
+    )
     parser.add_argument(
         "--use-sdk",
         action="store_true",
         return
     # Track B (always)
+    scenarios = generate_track_b_scenarios(benign_files, args.num_samples)
     # Track A (optional)
     if args.use_sdk:

patchhawk/data/scenarios.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

patchhawk/env_models.py CHANGED Viewed

@@ -53,6 +53,12 @@ class PatchHawkAction(Action):
     patch_content: Optional[str] = Field(
         None, description="The unified context patch if action is SUBMIT_PATCH"
     )
 # ── State ────────────────────────────────────────────────────────────

     patch_content: Optional[str] = Field(
         None, description="The unified context patch if action is SUBMIT_PATCH"
     )
+    reasoning: Optional[str] = Field(
+        None, description="Explanation of the vulnerability and chosen action"
+    )
+    predicted_risk: Optional[float] = Field(
+        None, description="LLM predicted risk score (0.0 to 1.0)"
+    )
 # ── State ────────────────────────────────────────────────────────────

patchhawk/training/train_grpo.py CHANGED Viewed

@@ -33,6 +33,7 @@ def _build_prompt(scenario: dict) -> str:
         f"<code_snippet>\n{scenario['code_snippet']}\n</code_snippet>\n"
         "Respond in STRICT XML:\n"
         "<thought>...</thought>\n"
         "<action>0-4</action>\n"
         "<patch>...</patch> (ONLY if action=3)\n"
     )
@@ -90,7 +91,10 @@ def train_agent(args):
     else:
         print("No GPU found — training will be slow.")
-    MODEL_NAME = "Qwen/Qwen2.5-Coder-3B-Instruct"
     # 4‑bit quantisation config
     bnb_config = BitsAndBytesConfig(
@@ -147,6 +151,10 @@ def train_agent(args):
                 score += 0.5
             else:
                 score -= 1.0
             if re.search(r"<action>[0-4]</action>", text):
                 score += 0.5
             else:
@@ -194,12 +202,22 @@ def train_agent(args):
             patch_match = re.search(r"<patch>(.*?)</patch>", text, re.DOTALL)
             if patch_match:
                 patch = patch_match.group(1).strip()
             try:
                 # Reset environment to the exact scenario
-                env.reset(scenario_idx=env.scenarios.index(scenario))
-                obs = env.step(PatchHawkAction(action_type=action_type, patch_content=patch))
-                rewards.append(float(obs.reward or 0.0))
             except Exception as exc:
                 print(f"env_reward crash: {exc}")
                 rewards.append(-3.0)

         f"<code_snippet>\n{scenario['code_snippet']}\n</code_snippet>\n"
         "Respond in STRICT XML:\n"
         "<thought>...</thought>\n"
+        "<risk_score>0.0 to 1.0</risk_score>\n"
         "<action>0-4</action>\n"
         "<patch>...</patch> (ONLY if action=3)\n"
     )
     else:
         print("No GPU found — training will be slow.")
+    from dotenv import load_dotenv
+    load_dotenv()
+    MODEL_NAME = os.getenv("GRPO_POLICY_MODEL", "Qwen/Qwen2.5-Coder-3B-Instruct")
     # 4‑bit quantisation config
     bnb_config = BitsAndBytesConfig(
                 score += 0.5
             else:
                 score -= 1.0
+            if re.search(r"<risk_score>[\d\.]+</risk_score>", text):
+                score += 0.5
+            else:
+                score -= 1.0
             if re.search(r"<action>[0-4]</action>", text):
                 score += 0.5
             else:
             patch_match = re.search(r"<patch>(.*?)</patch>", text, re.DOTALL)
             if patch_match:
                 patch = patch_match.group(1).strip()
+            risk_match = re.search(r"<risk_score>([\d\.]+)</risk_score>", text)
+            predicted_risk = float(risk_match.group(1)) if risk_match else None
             try:
                 # Reset environment to the exact scenario
+                env.reset(scenario=scenario)
+                obs = env.step(PatchHawkAction(
+                    action_type=action_type,
+                    patch_content=patch,
+                    predicted_risk=predicted_risk
+                ))
+                reward_val = float(obs.reward or 0.0)
+                rewards.append(reward_val)
+                val_msg = obs.metadata.get('validation') or ("Telemetry Extracted" if obs.metadata.get('telemetry') else "None")
+                print(f"[Env Reward] Action: {action_type} | Reward: {reward_val:+.2f} | Docker: {val_msg}")
             except Exception as exc:
                 print(f"env_reward crash: {exc}")
                 rewards.append(-3.0)