Spaces:

CreativeEngineer
/

vliw-optimizer

Sleeping

App Files Files Community

CreativeEngineer commited on Jan 26

Commit

c166ffe

1 Parent(s): c5c47e3

Add few-shot prompt and shaped rewards

Browse files

Files changed (1) hide show

app.py +98 -6

app.py CHANGED Viewed

@@ -80,6 +80,9 @@ except Exception as e:
 BASELINE_CYCLES = 147734
 TARGET_CYCLES = 1363
 SCORE_SCALE = 3000.0
 PERSIST_DIR = "/data" if os.path.isdir("/data") else "."
 ADAPTER_DIR = os.path.join(PERSIST_DIR, "adapters", "perf_takehome_latest")
 ADAPTER_DATASET_REPO = os.environ.get("ADAPTER_DATASET_REPO", "CreativeEngineer/vliw-optimizer-adapters")
@@ -172,9 +175,9 @@ def _try_download_adapter(add_log) -> None:
                             fdst.write(fsrc.read())
             add_log(f"[OK] Downloaded adapter from dataset: {ADAPTER_DATASET_REPO}/{ADAPTER_DATASET_SUBDIR}")
         else:
-            add_log("ℹ No adapter found in dataset yet")
     except Exception as e:
-        add_log(f"ℹ Adapter download skipped: {str(e)[:160]}")
 def _try_upload_adapter(add_log) -> None:
@@ -182,11 +185,11 @@ def _try_upload_adapter(add_log) -> None:
         add_log("[ERR] Hub sync disabled: huggingface_hub not available")
         return
     if not _adapter_exists(ADAPTER_DIR):
-        add_log("ℹ No adapter to upload yet")
         return
     token = _hf_token()
     if token is None:
-        add_log("ℹ No HF token set (HF_TOKEN/HUGGINGFACE_HUB_TOKEN); skipping upload")
         return
     try:
         api = HfApi(token=token)
@@ -200,7 +203,7 @@ def _try_upload_adapter(add_log) -> None:
         )
         add_log(f"[OK] Uploaded adapter to dataset: {ADAPTER_DATASET_REPO}/{ADAPTER_DATASET_SUBDIR}")
     except Exception as e:
-        add_log(f"ℹ Adapter upload skipped: {str(e)[:160]}")
 def _run_machine_with_cycle_limit(machine: Machine, max_cycles: int) -> bool:
@@ -264,6 +267,9 @@ def verify_perf_takehome_code(code: str, seed: int = 123) -> dict:
             "correctness": 0.0,
             "cycles": None,
             "msg": "Simulator unavailable",
         }
     try:
@@ -274,6 +280,22 @@ def verify_perf_takehome_code(code: str, seed: int = 123) -> dict:
                 "correctness": 0.0,
                 "cycles": None,
                 "msg": "Empty code",
             }
         if "OptimizedKernelBuilder" not in code:
@@ -282,6 +304,9 @@ def verify_perf_takehome_code(code: str, seed: int = 123) -> dict:
                 "correctness": 0.0,
                 "cycles": None,
                 "msg": "Missing OptimizedKernelBuilder",
             }
         if "def run" not in code:
@@ -290,6 +315,9 @@ def verify_perf_takehome_code(code: str, seed: int = 123) -> dict:
                 "correctness": 0.0,
                 "cycles": None,
                 "msg": "Missing run()",
             }
         safe_builtins = {
@@ -316,7 +344,18 @@ def verify_perf_takehome_code(code: str, seed: int = 123) -> dict:
             "SLOT_LIMITS": SLOT_LIMITS,
         }
-        exec(code, exec_globals)
         if "OptimizedKernelBuilder" not in exec_globals:
             return {
@@ -324,6 +363,9 @@ def verify_perf_takehome_code(code: str, seed: int = 123) -> dict:
                 "correctness": 0.0,
                 "cycles": None,
                 "msg": "OptimizedKernelBuilder not defined after exec",
             }
         ctx = _get_eval_context(seed)
@@ -351,6 +393,9 @@ def verify_perf_takehome_code(code: str, seed: int = 123) -> dict:
                 "correctness": 0.0,
                 "cycles": int(machine.cycle),
                 "msg": f"Exceeded cycle limit (cycles={machine.cycle})",
             }
         cycles = machine.cycle
@@ -360,6 +405,9 @@ def verify_perf_takehome_code(code: str, seed: int = 123) -> dict:
                 "correctness": 0.0,
                 "cycles": int(cycles),
                 "msg": f"Suspiciously low cycles ({cycles})",
             }
         if cycles > 200000:
             return {
@@ -367,6 +415,9 @@ def verify_perf_takehome_code(code: str, seed: int = 123) -> dict:
                 "correctness": 0.0,
                 "cycles": int(cycles),
                 "msg": f"Cycles too high ({cycles})",
             }
         inp_values_p = ctx["inp_values_p"]
@@ -378,6 +429,9 @@ def verify_perf_takehome_code(code: str, seed: int = 123) -> dict:
                 "correctness": 0.0,
                 "cycles": int(cycles),
                 "msg": f"Incorrect output (cycles={cycles})",
             }
         score = SCORE_SCALE / cycles
@@ -386,6 +440,9 @@ def verify_perf_takehome_code(code: str, seed: int = 123) -> dict:
             "correctness": 1.0,
             "cycles": int(cycles),
             "msg": f"Success: {cycles} cycles",
         }
     except Exception as e:
         return {
@@ -393,6 +450,9 @@ def verify_perf_takehome_code(code: str, seed: int = 123) -> dict:
             "correctness": 0.0,
             "cycles": None,
             "msg": f"Execution error: {str(e)[:200]}",
         }
@@ -410,6 +470,13 @@ def perf_takehome_reward_fn(completions, prompts=None, **kwargs):
         reward = 0.0
         if result.get("correctness", 0.0) > 0:
             reward = float(result["score"]) + 1.0
             cycles = result.get("cycles")
             with state_lock:
                 if isinstance(cycles, int) and cycles < training_state["best_cycles"]:
@@ -420,6 +487,29 @@ def perf_takehome_reward_fn(completions, prompts=None, **kwargs):
 # Prompt template for VLIW optimization
 PERF_TAKEHOME_PROMPT = f"""Write an optimized VLIW/SIMD kernel. OUTPUT ONLY ONE ```python CODE BLOCK.
 ARCHITECTURE: 12 ALU + 6 VALU (VLEN=8) + 2 load + 2 store + 1 flow slots per cycle. 1536-word scratch.
@@ -454,6 +544,8 @@ RULES:
 - No imports.
 Baseline: {BASELINE_CYCLES:,} cycles. Target: <{TARGET_CYCLES:,} cycles.
 """

 BASELINE_CYCLES = 147734
 TARGET_CYCLES = 1363
 SCORE_SCALE = 3000.0
+PARSE_REWARD = 0.02
+API_REWARD = 0.05
+EXEC_REWARD = 0.10
 PERSIST_DIR = "/data" if os.path.isdir("/data") else "."
 ADAPTER_DIR = os.path.join(PERSIST_DIR, "adapters", "perf_takehome_latest")
 ADAPTER_DATASET_REPO = os.environ.get("ADAPTER_DATASET_REPO", "CreativeEngineer/vliw-optimizer-adapters")
                             fdst.write(fsrc.read())
             add_log(f"[OK] Downloaded adapter from dataset: {ADAPTER_DATASET_REPO}/{ADAPTER_DATASET_SUBDIR}")
         else:
+            add_log("[INFO] No adapter found in dataset yet")
     except Exception as e:
+        add_log(f"[INFO] Adapter download skipped: {str(e)[:160]}")
 def _try_upload_adapter(add_log) -> None:
         add_log("[ERR] Hub sync disabled: huggingface_hub not available")
         return
     if not _adapter_exists(ADAPTER_DIR):
+        add_log("[INFO] No adapter to upload yet")
         return
     token = _hf_token()
     if token is None:
+        add_log("[INFO] No HF token set (HF_TOKEN/HUGGINGFACE_HUB_TOKEN); skipping upload")
         return
     try:
         api = HfApi(token=token)
         )
         add_log(f"[OK] Uploaded adapter to dataset: {ADAPTER_DATASET_REPO}/{ADAPTER_DATASET_SUBDIR}")
     except Exception as e:
+        add_log(f"[INFO] Adapter upload skipped: {str(e)[:160]}")
 def _run_machine_with_cycle_limit(machine: Machine, max_cycles: int) -> bool:
             "correctness": 0.0,
             "cycles": None,
             "msg": "Simulator unavailable",
+            "parse_ok": False,
+            "api_ok": False,
+            "exec_ok": False,
         }
     try:
                 "correctness": 0.0,
                 "cycles": None,
                 "msg": "Empty code",
+                "parse_ok": False,
+                "api_ok": False,
+                "exec_ok": False,
+            }
+        try:
+            compile(code, "<string>", "exec")
+        except Exception as e:
+            return {
+                "score": 0.0,
+                "correctness": 0.0,
+                "cycles": None,
+                "msg": f"Syntax error: {str(e)[:200]}",
+                "parse_ok": False,
+                "api_ok": False,
+                "exec_ok": False,
             }
         if "OptimizedKernelBuilder" not in code:
                 "correctness": 0.0,
                 "cycles": None,
                 "msg": "Missing OptimizedKernelBuilder",
+                "parse_ok": True,
+                "api_ok": False,
+                "exec_ok": False,
             }
         if "def run" not in code:
                 "correctness": 0.0,
                 "cycles": None,
                 "msg": "Missing run()",
+                "parse_ok": True,
+                "api_ok": False,
+                "exec_ok": False,
             }
         safe_builtins = {
             "SLOT_LIMITS": SLOT_LIMITS,
         }
+        try:
+            exec(code, exec_globals)
+        except Exception as e:
+            return {
+                "score": 0.0,
+                "correctness": 0.0,
+                "cycles": None,
+                "msg": f"Execution error: {str(e)[:200]}",
+                "parse_ok": True,
+                "api_ok": True,
+                "exec_ok": False,
+            }
         if "OptimizedKernelBuilder" not in exec_globals:
             return {
                 "correctness": 0.0,
                 "cycles": None,
                 "msg": "OptimizedKernelBuilder not defined after exec",
+                "parse_ok": True,
+                "api_ok": True,
+                "exec_ok": True,
             }
         ctx = _get_eval_context(seed)
                 "correctness": 0.0,
                 "cycles": int(machine.cycle),
                 "msg": f"Exceeded cycle limit (cycles={machine.cycle})",
+                "parse_ok": True,
+                "api_ok": True,
+                "exec_ok": True,
             }
         cycles = machine.cycle
                 "correctness": 0.0,
                 "cycles": int(cycles),
                 "msg": f"Suspiciously low cycles ({cycles})",
+                "parse_ok": True,
+                "api_ok": True,
+                "exec_ok": True,
             }
         if cycles > 200000:
             return {
                 "correctness": 0.0,
                 "cycles": int(cycles),
                 "msg": f"Cycles too high ({cycles})",
+                "parse_ok": True,
+                "api_ok": True,
+                "exec_ok": True,
             }
         inp_values_p = ctx["inp_values_p"]
                 "correctness": 0.0,
                 "cycles": int(cycles),
                 "msg": f"Incorrect output (cycles={cycles})",
+                "parse_ok": True,
+                "api_ok": True,
+                "exec_ok": True,
             }
         score = SCORE_SCALE / cycles
             "correctness": 1.0,
             "cycles": int(cycles),
             "msg": f"Success: {cycles} cycles",
+            "parse_ok": True,
+            "api_ok": True,
+            "exec_ok": True,
         }
     except Exception as e:
         return {
             "correctness": 0.0,
             "cycles": None,
             "msg": f"Execution error: {str(e)[:200]}",
+            "parse_ok": False,
+            "api_ok": False,
+            "exec_ok": False,
         }
         reward = 0.0
         if result.get("correctness", 0.0) > 0:
             reward = float(result["score"]) + 1.0
+        else:
+            if result.get("parse_ok"):
+                reward += PARSE_REWARD
+            if result.get("api_ok"):
+                reward += API_REWARD
+            if result.get("exec_ok"):
+                reward += EXEC_REWARD
             cycles = result.get("cycles")
             with state_lock:
                 if isinstance(cycles, int) and cycles < training_state["best_cycles"]:
 # Prompt template for VLIW optimization
+FEWSHOT_EXAMPLES = """Example format (not optimized):
+```python
+class OptimizedKernelBuilder(KernelBuilder):
+    def build_kernel(self, forest_height, n_nodes, batch_size, rounds):
+        self.add("flow", ("halt",))
+def run():
+    return (0,)
+```
+Example with scratch + load:
+```python
+class OptimizedKernelBuilder(KernelBuilder):
+    def build_kernel(self, forest_height, n_nodes, batch_size, rounds):
+        tmp = self.alloc_scratch("tmp")
+        self.add("load", ("const", tmp, 0))
+        self.add("flow", ("halt",))
+def run():
+    return (0,)
+```
+"""
 PERF_TAKEHOME_PROMPT = f"""Write an optimized VLIW/SIMD kernel. OUTPUT ONLY ONE ```python CODE BLOCK.
 ARCHITECTURE: 12 ALU + 6 VALU (VLEN=8) + 2 load + 2 store + 1 flow slots per cycle. 1536-word scratch.
 - No imports.
 Baseline: {BASELINE_CYCLES:,} cycles. Target: <{TARGET_CYCLES:,} cycles.
+{FEWSHOT_EXAMPLES}
 """