Spaces:
Sleeping
Sleeping
| """ | |
| CodeContext registry for each Phase-2-enabled scenario. | |
| Snapshots live under <repo_root>/snapshots/<name>/, bundled in the repo β | |
| no live GitHub API calls. Each context provides the snapshot path, the bad | |
| commit SHA, the ground-truth files/diff (used by `grader_p2.grade_patch_quality`) | |
| and a slot for the Pool-B null-context baseline (filled in by the | |
| `training/run_pool_b_baseline.py` runner β re-imported on demand). | |
| The `null_context_p2_score` field starts at a hand-tuned prior; it is | |
| overwritten in-place by the baseline runner once we have measurements. | |
| """ | |
| from __future__ import annotations | |
| from pathlib import Path | |
| from ..models import CodeContext | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Snapshot-root resolution | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| _PKG_ROOT = Path(__file__).resolve().parent.parent # scaler-hackathon/ | |
| SNAPSHOTS_ROOT = _PKG_ROOT / "snapshots" | |
| def _snap(name: str) -> str: | |
| """Absolute path to a snapshot directory under <repo_root>/snapshots/.""" | |
| return str(SNAPSHOTS_ROOT / name) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Memory leak (easy) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| MEMORY_LEAK_CODE_CONTEXT = CodeContext( | |
| repo_snapshot_path = _snap("orders_v231"), | |
| bad_commit_sha = "a3f7c91", | |
| ground_truth_files = ["orders/handlers/batch.py"], | |
| ground_truth_diff = """--- a/orders/handlers/batch.py | |
| +++ b/orders/handlers/batch.py | |
| @@ -41,6 +41,7 @@ class BatchProcessor: | |
| for order in orders: | |
| self._cache[order.id] = order | |
| + self._cache.clear() | |
| self._notify(orders) | |
| """, | |
| is_valid_issue = True, | |
| expected_p2_steps = 5, | |
| null_context_p2_score = 0.21, | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Cascading failure (medium) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| CASCADING_FAILURE_CODE_CONTEXT = CodeContext( | |
| repo_snapshot_path = _snap("auth_v180"), | |
| bad_commit_sha = "b8e2d44", | |
| ground_truth_files = ["auth/config.py"], | |
| ground_truth_diff = """--- a/auth/config.py | |
| +++ b/auth/config.py | |
| @@ -10,3 +10,3 @@ | |
| -JWT_SECRET = os.environ.get("JWT_SECRET") | |
| +JWT_SECRET = os.environ.get("JWT_SECRET") or _DEFAULT_DEV_SECRET | |
| """, | |
| is_valid_issue = True, | |
| expected_p2_steps = 4, | |
| null_context_p2_score = 0.18, | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Distributed deadlock (hard) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| DISTRIBUTED_DEADLOCK_CODE_CONTEXT = CodeContext( | |
| repo_snapshot_path = _snap("payment_v310"), | |
| bad_commit_sha = "c5a1f77", | |
| ground_truth_files = ["payment/processor.py"], | |
| ground_truth_diff = """--- a/payment/processor.py | |
| +++ b/payment/processor.py | |
| @@ -85,5 +85,7 @@ class PaymentProcessor: | |
| def retry(self, txn): | |
| + delay = min(2 ** self.retry_count, 30) | |
| + time.sleep(delay) | |
| self._queue.enqueue(txn) | |
| """, | |
| is_valid_issue = True, | |
| expected_p2_steps = 10, | |
| null_context_p2_score = 0.09, | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Circuit breaker β no-change scenario (the patch grader should reject any | |
| # proposed diff and reward `declare_no_change`). | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| CIRCUIT_BREAKER_CODE_CONTEXT = CodeContext( | |
| repo_snapshot_path = _snap("orders_v300"), | |
| bad_commit_sha = "d2b9c11", | |
| ground_truth_files = [], | |
| ground_truth_diff = "", | |
| is_valid_issue = False, | |
| expected_p2_steps = 6, | |
| null_context_p2_score = 0.15, | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Phase-B scenarios (RL-discoverable, the brief's four types) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| ALIASED_FAULT_CODE_CONTEXT = CodeContext( | |
| repo_snapshot_path = _snap("queue_v210"), | |
| bad_commit_sha = "e1f4a02", | |
| ground_truth_files = ["queue/worker.py"], | |
| ground_truth_diff = """--- a/queue/worker.py | |
| +++ b/queue/worker.py | |
| @@ -22,4 +22,5 @@ class CacheWriter: | |
| def flush(self, batch): | |
| - for k, v in batch.items(): | |
| + for k, v in list(batch.items()): | |
| self._cache.set(k, v) | |
| + batch.clear() | |
| """, | |
| is_valid_issue = True, | |
| expected_p2_steps = 7, | |
| null_context_p2_score = 0.16, | |
| ) | |
| SEVERITY_INVERSION_CODE_CONTEXT = CodeContext( | |
| repo_snapshot_path = _snap("orders_retry_storm"), | |
| bad_commit_sha = "f8c9b13", | |
| ground_truth_files = ["orders/auth_client.py"], | |
| ground_truth_diff = """--- a/orders/auth_client.py | |
| +++ b/orders/auth_client.py | |
| @@ -15,5 +15,6 @@ class AuthClient: | |
| def validate(self, token): | |
| - return self._call_with_retries(token, retries=20) | |
| + return self._call_with_retries(token, retries=2, | |
| + backoff_seconds=0.5) | |
| """, | |
| is_valid_issue = True, | |
| expected_p2_steps = 8, | |
| null_context_p2_score = 0.12, | |
| ) | |
| CONFIDENCE_INVERSION_CODE_CONTEXT = CodeContext( | |
| repo_snapshot_path = _snap("payment_threadpool"), | |
| bad_commit_sha = "11abf04", | |
| ground_truth_files = ["payment/threadpool.py"], | |
| ground_truth_diff = """--- a/payment/threadpool.py | |
| +++ b/payment/threadpool.py | |
| @@ -8,4 +8,5 @@ class PoolWorker: | |
| def acquire(self): | |
| - self._lock_a.acquire() | |
| - self._lock_b.acquire() | |
| + with self._global_order: | |
| + self._lock_a.acquire() | |
| + self._lock_b.acquire() | |
| """, | |
| is_valid_issue = True, | |
| expected_p2_steps = 9, | |
| null_context_p2_score = 0.10, | |
| ) | |
| INFO_ORDERING_CODE_CONTEXT = CodeContext( | |
| repo_snapshot_path = _snap("shared_libs_dep"), | |
| bad_commit_sha = "9d2e7af", | |
| ground_truth_files = ["requirements.txt", "shared/serializer.py"], | |
| ground_truth_diff = """--- a/requirements.txt | |
| +++ b/requirements.txt | |
| @@ -3,1 +3,1 @@ | |
| -shared-serializer==1.4.2 | |
| +shared-serializer==1.3.0 | |
| """, | |
| is_valid_issue = True, | |
| expected_p2_steps = 9, | |
| null_context_p2_score = 0.11, | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Pool-D held-out scenarios | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| HELDOUT_ALIASED_SEVERITY_CODE_CONTEXT = CodeContext( | |
| repo_snapshot_path = _snap("orders_retry_storm"), | |
| bad_commit_sha = "d09a4f1", | |
| ground_truth_files = ["orders/auth_client.py"], | |
| ground_truth_diff = """--- a/orders/auth_client.py | |
| +++ b/orders/auth_client.py | |
| @@ -15,5 +15,6 @@ class AuthClient: | |
| def validate(self, token): | |
| - return self._call_with_retries(token, retries=25) | |
| + return self._call_with_retries(token, retries=2, | |
| + backoff_seconds=0.5) | |
| """, | |
| is_valid_issue = True, | |
| expected_p2_steps = 9, | |
| null_context_p2_score = 0.10, | |
| ) | |
| HELDOUT_CONFIDENCE_ORDERING_CODE_CONTEXT = CodeContext( | |
| repo_snapshot_path = _snap("shared_libs_dep"), | |
| bad_commit_sha = "9d2e7af", | |
| ground_truth_files = ["requirements.txt"], | |
| ground_truth_diff = """--- a/requirements.txt | |
| +++ b/requirements.txt | |
| @@ -3,1 +3,1 @@ | |
| -shared-serializer==1.4.2 | |
| +shared-serializer==1.3.0 | |
| """, | |
| is_valid_issue = True, | |
| expected_p2_steps = 10, | |
| null_context_p2_score = 0.10, | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Lookup helpers (used by Pool-B baseline runner to write baselines back) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| CODE_CONTEXTS = { | |
| "memory_leak": MEMORY_LEAK_CODE_CONTEXT, | |
| "cascading_failure": CASCADING_FAILURE_CODE_CONTEXT, | |
| "distributed_deadlock": DISTRIBUTED_DEADLOCK_CODE_CONTEXT, | |
| "circuit_breaker_noop": CIRCUIT_BREAKER_CODE_CONTEXT, | |
| "aliased_fault": ALIASED_FAULT_CODE_CONTEXT, | |
| "severity_inversion": SEVERITY_INVERSION_CODE_CONTEXT, | |
| "confidence_inversion": CONFIDENCE_INVERSION_CODE_CONTEXT, | |
| "info_ordering": INFO_ORDERING_CODE_CONTEXT, | |
| "heldout_aliased_severity": HELDOUT_ALIASED_SEVERITY_CODE_CONTEXT, | |
| "heldout_confidence_ordering": HELDOUT_CONFIDENCE_ORDERING_CODE_CONTEXT, | |
| } | |
| def update_null_baseline(task_name: str, score: float) -> None: | |
| """Mutate the in-process null-context baseline for `task_name`.""" | |
| ctx = CODE_CONTEXTS.get(task_name) | |
| if ctx is None: | |
| raise KeyError(f"No code context for task {task_name}") | |
| ctx.null_context_p2_score = float(score) | |