Aswini-Kumar commited on
Commit
f83d2aa
·
verified ·
1 Parent(s): d3408a0

upload: server/task_generator.py

Browse files
Files changed (1) hide show
  1. server/task_generator.py +510 -0
server/task_generator.py ADDED
@@ -0,0 +1,510 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ server/task_generator.py
3
+
4
+ Generates coding tasks with:
5
+ 1. Name randomization per episode seed — prevents session 2 reconstructing from
6
+ pretrained knowledge without reading the handoff note.
7
+ 2. Injected hidden adversarial tests — prevents visible-test overfitting.
8
+ 3. Handoff-critical calibration — tasks are designed so session 1 cannot
9
+ fully finish within the step limit.
10
+ """
11
+
12
+ import copy
13
+ import json
14
+ import os
15
+ import random
16
+ from dataclasses import dataclass, field
17
+ from typing import Dict, List, Optional
18
+
19
+
20
+ # ---------------------------------------------------------------------------
21
+ # Canonical → variant name bank (expanded per task template)
22
+ # ---------------------------------------------------------------------------
23
+
24
+ NAME_BANK: Dict[str, List[str]] = {
25
+ # Data structures
26
+ "merge_intervals": ["combine_ranges", "fuse_spans", "join_segments"],
27
+ "Stack": ["Accumulator", "PushPop", "LifoStore"],
28
+ "push": ["enqueue_item", "add_entry", "store_val"],
29
+ "pop": ["dequeue_item", "remove_entry", "fetch_val"],
30
+ # Rate limiting
31
+ "RateLimiter": ["ThrottleGuard", "RequestBucket", "AccessGate"],
32
+ "is_allowed": ["check_permit", "can_proceed", "gate_request"],
33
+ # LRU Cache
34
+ "LRUCache": ["BoundedCache", "EvictStore", "MruVault"],
35
+ "get": ["fetch", "retrieve", "lookup"],
36
+ "put": ["store", "insert", "upsert"],
37
+ # Data processing
38
+ "process_data": ["transform_records", "handle_payload", "digest_input"],
39
+ "normalize": ["standardize", "rescale", "calibrate"],
40
+ # Graph
41
+ "TopologicalSort": ["DependencyOrder", "DAGResolver", "LayerSorter"],
42
+ "add_edge": ["link_nodes", "connect_dep", "wire_pair"],
43
+ # Retry
44
+ "RetryExecutor": ["FaultTolerant", "BackoffRunner", "ResilienceWrap"],
45
+ "execute": ["run_with_retry", "attempt_call", "safe_invoke"],
46
+ }
47
+
48
+
49
+ # ---------------------------------------------------------------------------
50
+ # Data classes
51
+ # ---------------------------------------------------------------------------
52
+
53
+ @dataclass
54
+ class Task:
55
+ task_id: str
56
+ difficulty: str
57
+ description: str
58
+ starter_code: Dict[str, str] # filename → source
59
+ test_code: str # visible pytest suite
60
+ hidden_test_code: str # adversarial hidden suite
61
+ files: Dict[str, str] = field(default_factory=dict) # runtime state
62
+
63
+
64
+ # ---------------------------------------------------------------------------
65
+ # Task templates (inline — no external files required for dev/demo)
66
+ # ---------------------------------------------------------------------------
67
+
68
+ TASK_TEMPLATES = {
69
+ # -----------------------------------------------------------------------
70
+ # EASY tasks
71
+ # -----------------------------------------------------------------------
72
+ "easy_merge_intervals": {
73
+ "difficulty": "easy",
74
+ "description": (
75
+ "Implement merge_intervals(intervals: list[list[int]]) -> list[list[int]] "
76
+ "in solution.py. The function receives a list of [start, end] intervals "
77
+ "and must return a merged list with no overlaps. "
78
+ "Session 1 should implement the sort + sweep logic and pass visible tests. "
79
+ "Session 2 must handle edge cases (empty input, single interval, touching "
80
+ "but non-overlapping intervals)."
81
+ ),
82
+ "starter_code": {
83
+ "solution.py": (
84
+ "def merge_intervals(intervals):\n"
85
+ " # TODO: implement\n"
86
+ " pass\n"
87
+ )
88
+ },
89
+ "test_code": (
90
+ "from solution import merge_intervals\n\n"
91
+ "def test_basic():\n"
92
+ " assert merge_intervals([[1,3],[2,6],[8,10],[15,18]]) == [[1,6],[8,10],[15,18]]\n\n"
93
+ "def test_overlapping():\n"
94
+ " assert merge_intervals([[1,4],[4,5]]) == [[1,5]]\n\n"
95
+ "def test_no_overlap():\n"
96
+ " assert merge_intervals([[1,2],[3,4]]) == [[1,2],[3,4]]\n"
97
+ ),
98
+ "hidden_test_code": (
99
+ "from solution import merge_intervals\n\n"
100
+ "def test_empty():\n"
101
+ " assert merge_intervals([]) == []\n\n"
102
+ "def test_single():\n"
103
+ " assert merge_intervals([[5,5]]) == [[5,5]]\n"
104
+ ),
105
+ },
106
+
107
+ "easy_stack": {
108
+ "difficulty": "easy",
109
+ "description": (
110
+ "Implement a Stack class in solution.py with push(val), pop() -> val, "
111
+ "peek() -> val, is_empty() -> bool, and size() -> int. "
112
+ "pop() and peek() on empty stack should raise IndexError. "
113
+ "Session 1: implement and pass visible tests. "
114
+ "Session 2: add __repr__ and make the class iterable."
115
+ ),
116
+ "starter_code": {
117
+ "solution.py": (
118
+ "class Stack:\n"
119
+ " def __init__(self):\n"
120
+ " # TODO\n"
121
+ " pass\n"
122
+ )
123
+ },
124
+ "test_code": (
125
+ "from solution import Stack\n\n"
126
+ "def test_push_pop():\n"
127
+ " s = Stack()\n"
128
+ " s.push(1); s.push(2)\n"
129
+ " assert s.pop() == 2\n\n"
130
+ "def test_empty_pop():\n"
131
+ " import pytest\n"
132
+ " s = Stack()\n"
133
+ " with pytest.raises(IndexError):\n"
134
+ " s.pop()\n\n"
135
+ "def test_size():\n"
136
+ " s = Stack()\n"
137
+ " s.push(10); s.push(20)\n"
138
+ " assert s.size() == 2\n"
139
+ ),
140
+ "hidden_test_code": (
141
+ "from solution import Stack\n\n"
142
+ "def test_repr():\n"
143
+ " s = Stack()\n"
144
+ " s.push(1)\n"
145
+ " assert '1' in repr(s)\n\n"
146
+ "def test_iterable():\n"
147
+ " s = Stack()\n"
148
+ " for v in [3,2,1]:\n"
149
+ " s.push(v)\n"
150
+ " assert list(s) == [1, 2, 3]\n"
151
+ ),
152
+ },
153
+
154
+ "easy_running_median": {
155
+ "difficulty": "easy",
156
+ "description": (
157
+ "Implement RunningMedian in solution.py. It must support add(num) and "
158
+ "get_median() -> float. Uses two heaps internally. "
159
+ "Session 1: implement heap-based median, pass visible tests. "
160
+ "Session 2: add reset() and from_list(nums) classmethod."
161
+ ),
162
+ "starter_code": {
163
+ "solution.py": (
164
+ "class RunningMedian:\n"
165
+ " def __init__(self):\n"
166
+ " # TODO: two-heap approach\n"
167
+ " pass\n"
168
+ )
169
+ },
170
+ "test_code": (
171
+ "from solution import RunningMedian\n\n"
172
+ "def test_basic():\n"
173
+ " rm = RunningMedian()\n"
174
+ " rm.add(1); rm.add(2); rm.add(3)\n"
175
+ " assert rm.get_median() == 2.0\n\n"
176
+ "def test_even():\n"
177
+ " rm = RunningMedian()\n"
178
+ " rm.add(1); rm.add(2)\n"
179
+ " assert rm.get_median() == 1.5\n"
180
+ ),
181
+ "hidden_test_code": (
182
+ "from solution import RunningMedian\n\n"
183
+ "def test_reset():\n"
184
+ " rm = RunningMedian()\n"
185
+ " rm.add(5)\n"
186
+ " rm.reset()\n"
187
+ " rm.add(1)\n"
188
+ " assert rm.get_median() == 1.0\n\n"
189
+ "def test_from_list():\n"
190
+ " rm = RunningMedian.from_list([3, 1, 2])\n"
191
+ " assert rm.get_median() == 2.0\n"
192
+ ),
193
+ },
194
+
195
+ # -----------------------------------------------------------------------
196
+ # MEDIUM tasks
197
+ # -----------------------------------------------------------------------
198
+ "medium_rate_limiter": {
199
+ "difficulty": "medium",
200
+ "description": (
201
+ "Implement a token-bucket RateLimiter in solution.py. "
202
+ "Constructor: RateLimiter(rate: int, capacity: int). "
203
+ "is_allowed(n_tokens=1) -> bool: returns True if n_tokens can be consumed, "
204
+ "refilling at 'rate' tokens per second. "
205
+ "Use time.monotonic() for timestamps. "
206
+ "Session 1: core token-bucket logic + visible tests. "
207
+ "Session 2: add burst_remaining() -> int and thread-safety via threading.Lock."
208
+ ),
209
+ "starter_code": {
210
+ "solution.py": (
211
+ "import time\n\n"
212
+ "class RateLimiter:\n"
213
+ " def __init__(self, rate: int, capacity: int):\n"
214
+ " # TODO\n"
215
+ " pass\n"
216
+ )
217
+ },
218
+ "test_code": (
219
+ "import time\n"
220
+ "from solution import RateLimiter\n\n"
221
+ "def test_basic_allow():\n"
222
+ " rl = RateLimiter(10, 10)\n"
223
+ " assert rl.is_allowed() is True\n\n"
224
+ "def test_exhaustion():\n"
225
+ " rl = RateLimiter(1, 3)\n"
226
+ " assert rl.is_allowed(3) is True\n"
227
+ " assert rl.is_allowed() is False\n\n"
228
+ "def test_refill():\n"
229
+ " rl = RateLimiter(10, 10)\n"
230
+ " rl.is_allowed(10)\n"
231
+ " time.sleep(0.2)\n"
232
+ " assert rl.is_allowed(2) is True\n\n"
233
+ "def test_over_capacity():\n"
234
+ " rl = RateLimiter(5, 5)\n"
235
+ " assert rl.is_allowed(6) is False\n\n"
236
+ "def test_zero_tokens():\n"
237
+ " rl = RateLimiter(5, 5)\n"
238
+ " assert rl.is_allowed(0) is True\n"
239
+ ),
240
+ "hidden_test_code": (
241
+ "import threading, time\n"
242
+ "from solution import RateLimiter\n\n"
243
+ "def test_burst_remaining():\n"
244
+ " rl = RateLimiter(10, 10)\n"
245
+ " rl.is_allowed(4)\n"
246
+ " assert rl.burst_remaining() == 6\n\n"
247
+ "def test_thread_safe():\n"
248
+ " rl = RateLimiter(100, 100)\n"
249
+ " results = []\n"
250
+ " def task():\n"
251
+ " results.append(rl.is_allowed(10))\n"
252
+ " threads = [threading.Thread(target=task) for _ in range(10)]\n"
253
+ " for t in threads: t.start()\n"
254
+ " for t in threads: t.join()\n"
255
+ " assert results.count(True) == 10\n"
256
+ ),
257
+ },
258
+
259
+ "medium_lru_cache": {
260
+ "difficulty": "medium",
261
+ "description": (
262
+ "Implement LRUCache(capacity: int) in solution.py. "
263
+ "get(key) -> int: return value or -1 if not present. "
264
+ "put(key, value): insert, evicting LRU entry if at capacity. "
265
+ "Both O(1) using dict + doubly-linked list. "
266
+ "Session 1: core get/put + visible tests. "
267
+ "Session 2: add keys() -> list (in MRU→LRU order) and "
268
+ "clear() method."
269
+ ),
270
+ "starter_code": {
271
+ "solution.py": (
272
+ "class LRUCache:\n"
273
+ " def __init__(self, capacity: int):\n"
274
+ " # TODO: doubly-linked list + dict\n"
275
+ " pass\n"
276
+ )
277
+ },
278
+ "test_code": (
279
+ "from solution import LRUCache\n\n"
280
+ "def test_basic():\n"
281
+ " c = LRUCache(2)\n"
282
+ " c.put(1, 1); c.put(2, 2)\n"
283
+ " assert c.get(1) == 1\n"
284
+ " c.put(3, 3)\n"
285
+ " assert c.get(2) == -1\n\n"
286
+ "def test_overwrite():\n"
287
+ " c = LRUCache(2)\n"
288
+ " c.put(1, 10); c.put(1, 20)\n"
289
+ " assert c.get(1) == 20\n\n"
290
+ "def test_capacity_one():\n"
291
+ " c = LRUCache(1)\n"
292
+ " c.put(1, 1); c.put(2, 2)\n"
293
+ " assert c.get(1) == -1\n"
294
+ " assert c.get(2) == 2\n\n"
295
+ "def test_miss():\n"
296
+ " c = LRUCache(3)\n"
297
+ " assert c.get(99) == -1\n\n"
298
+ "def test_no_eviction_under_cap():\n"
299
+ " c = LRUCache(5)\n"
300
+ " for i in range(5):\n"
301
+ " c.put(i, i*10)\n"
302
+ " for i in range(5):\n"
303
+ " assert c.get(i) == i*10\n"
304
+ ),
305
+ "hidden_test_code": (
306
+ "from solution import LRUCache\n\n"
307
+ "def test_keys_order():\n"
308
+ " c = LRUCache(3)\n"
309
+ " c.put(1,1); c.put(2,2); c.put(3,3)\n"
310
+ " c.get(1)\n"
311
+ " assert c.keys()[0] == 1\n\n"
312
+ "def test_clear():\n"
313
+ " c = LRUCache(3)\n"
314
+ " c.put(1,1); c.put(2,2)\n"
315
+ " c.clear()\n"
316
+ " assert c.get(1) == -1\n"
317
+ ),
318
+ },
319
+
320
+ # -----------------------------------------------------------------------
321
+ # HARD tasks
322
+ # -----------------------------------------------------------------------
323
+ "hard_topological_sort": {
324
+ "difficulty": "hard",
325
+ "description": (
326
+ "Implement a TopologicalSort class in solution.py for a DAG. "
327
+ "add_edge(u, v): add directed edge u→v. "
328
+ "sort() -> list: return topological order (raise CycleError if cycle). "
329
+ "has_path(src, dst) -> bool: BFS/DFS reachability. "
330
+ "Also implement parallel_layers() -> list[list]: return nodes grouped "
331
+ "by execution layer (Kahn's algorithm variant). "
332
+ "Session 1: add_edge + sort + CycleError + visible tests. "
333
+ "Session 2: has_path + parallel_layers + hidden tests."
334
+ ),
335
+ "starter_code": {
336
+ "solution.py": (
337
+ "from collections import defaultdict, deque\n\n"
338
+ "class CycleError(Exception):\n"
339
+ " pass\n\n"
340
+ "class TopologicalSort:\n"
341
+ " def __init__(self):\n"
342
+ " self.graph = defaultdict(list)\n"
343
+ " self.nodes = set()\n\n"
344
+ " def add_edge(self, u, v):\n"
345
+ " # TODO\n"
346
+ " pass\n\n"
347
+ " def sort(self):\n"
348
+ " # TODO: Kahn's algorithm\n"
349
+ " pass\n"
350
+ )
351
+ },
352
+ "test_code": (
353
+ "import pytest\n"
354
+ "from solution import TopologicalSort, CycleError\n\n"
355
+ "def test_linear():\n"
356
+ " ts = TopologicalSort()\n"
357
+ " ts.add_edge('a','b'); ts.add_edge('b','c')\n"
358
+ " order = ts.sort()\n"
359
+ " assert order.index('a') < order.index('b') < order.index('c')\n\n"
360
+ "def test_cycle():\n"
361
+ " ts = TopologicalSort()\n"
362
+ " ts.add_edge('a','b'); ts.add_edge('b','a')\n"
363
+ " with pytest.raises(CycleError):\n"
364
+ " ts.sort()\n\n"
365
+ "def test_diamond():\n"
366
+ " ts = TopologicalSort()\n"
367
+ " ts.add_edge('a','b'); ts.add_edge('a','c')\n"
368
+ " ts.add_edge('b','d'); ts.add_edge('c','d')\n"
369
+ " order = ts.sort()\n"
370
+ " assert order[0] == 'a' and order[-1] == 'd'\n\n"
371
+ "def test_isolated_node():\n"
372
+ " ts = TopologicalSort()\n"
373
+ " ts.add_edge('a','b')\n"
374
+ " order = ts.sort()\n"
375
+ " assert set(order) == {'a','b'}\n\n"
376
+ "def test_empty():\n"
377
+ " ts = TopologicalSort()\n"
378
+ " assert ts.sort() == []\n\n"
379
+ "def test_single_node():\n"
380
+ " ts = TopologicalSort()\n"
381
+ " ts.add_edge('x','x')\n"
382
+ " with pytest.raises(CycleError):\n"
383
+ " ts.sort()\n\n"
384
+ "def test_large_dag():\n"
385
+ " ts = TopologicalSort()\n"
386
+ " for i in range(9):\n"
387
+ " ts.add_edge(str(i), str(i+1))\n"
388
+ " order = ts.sort()\n"
389
+ " assert order == [str(i) for i in range(10)]\n\n"
390
+ "def test_multi_root():\n"
391
+ " ts = TopologicalSort()\n"
392
+ " ts.add_edge('a','c'); ts.add_edge('b','c')\n"
393
+ " order = ts.sort()\n"
394
+ " assert order.index('a') < order.index('c')\n"
395
+ " assert order.index('b') < order.index('c')\n"
396
+ ),
397
+ "hidden_test_code": (
398
+ "from solution import TopologicalSort\n\n"
399
+ "def test_has_path_true():\n"
400
+ " ts = TopologicalSort()\n"
401
+ " ts.add_edge('a','b'); ts.add_edge('b','c')\n"
402
+ " assert ts.has_path('a','c') is True\n\n"
403
+ "def test_has_path_false():\n"
404
+ " ts = TopologicalSort()\n"
405
+ " ts.add_edge('a','b')\n"
406
+ " assert ts.has_path('b','a') is False\n\n"
407
+ "def test_parallel_layers():\n"
408
+ " ts = TopologicalSort()\n"
409
+ " ts.add_edge('a','c'); ts.add_edge('b','c')\n"
410
+ " layers = ts.parallel_layers()\n"
411
+ " assert set(layers[0]) == {'a','b'}\n"
412
+ " assert layers[-1] == ['c']\n"
413
+ ),
414
+ },
415
+ }
416
+
417
+ # Holdout tasks (simplified for eval only)
418
+ HOLDOUT_TEMPLATES = {
419
+ "holdout_two_sum": TASK_TEMPLATES["easy_merge_intervals"], # placeholder
420
+ "holdout_word_count": TASK_TEMPLATES["easy_stack"],
421
+ "holdout_retry_exec": TASK_TEMPLATES["medium_rate_limiter"],
422
+ }
423
+
424
+
425
+ # ---------------------------------------------------------------------------
426
+ # TaskGenerator
427
+ # ---------------------------------------------------------------------------
428
+
429
+ class TaskGenerator:
430
+ """
431
+ Samples tasks from the template bank, applies name randomization per seed,
432
+ and injects hidden adversarial tests.
433
+ """
434
+
435
+ def __init__(self, difficulty: str = "medium"):
436
+ assert difficulty in {"easy", "medium", "hard"}, f"Invalid difficulty: {difficulty}"
437
+ self.difficulty = difficulty
438
+ self._bank = {
439
+ k: v for k, v in TASK_TEMPLATES.items()
440
+ if v["difficulty"] == difficulty
441
+ }
442
+ self._holdout = HOLDOUT_TEMPLATES
443
+
444
+ def sample(
445
+ self,
446
+ task_id: Optional[str] = None,
447
+ seed: Optional[int] = None,
448
+ ) -> Task:
449
+ if seed is not None:
450
+ random.seed(seed)
451
+
452
+ if task_id and task_id in TASK_TEMPLATES:
453
+ template = copy.deepcopy(TASK_TEMPLATES[task_id])
454
+ chosen_id = task_id
455
+ else:
456
+ chosen_id = random.choice(list(self._bank.keys()))
457
+ template = copy.deepcopy(self._bank[chosen_id])
458
+
459
+ task = self._build_task(chosen_id, template)
460
+ task = self._randomize_names(task)
461
+ return task
462
+
463
+ def sample_holdout(self, task_id: Optional[str] = None) -> Task:
464
+ """Sample from holdout set (never used in training)."""
465
+ if task_id and task_id in self._holdout:
466
+ template = copy.deepcopy(self._holdout[task_id])
467
+ chosen_id = task_id
468
+ else:
469
+ chosen_id = random.choice(list(self._holdout.keys()))
470
+ template = copy.deepcopy(self._holdout[chosen_id])
471
+ return self._build_task(chosen_id, template)
472
+
473
+ # ------------------------------------------------------------------
474
+ # Private helpers
475
+ # ------------------------------------------------------------------
476
+
477
+ @staticmethod
478
+ def _build_task(task_id: str, template: dict) -> Task:
479
+ return Task(
480
+ task_id=task_id,
481
+ difficulty=template["difficulty"],
482
+ description=template["description"],
483
+ starter_code=dict(template["starter_code"]),
484
+ test_code=template["test_code"],
485
+ hidden_test_code=template["hidden_test_code"],
486
+ files=dict(template["starter_code"]), # runtime mutable copy
487
+ )
488
+
489
+ @staticmethod
490
+ def _randomize_names(task: Task) -> Task:
491
+ """
492
+ Randomly remap canonical names to episode-specific variants.
493
+ This prevents Session 2 from reconstructing the solution purely from
494
+ pretrained knowledge without reading the handoff note.
495
+ """
496
+ mapping = {}
497
+ for canonical, variants in NAME_BANK.items():
498
+ mapping[canonical] = random.choice(variants)
499
+
500
+ def apply(text: str) -> str:
501
+ for canon, variant in mapping.items():
502
+ text = text.replace(canon, variant)
503
+ return text
504
+
505
+ task.description = apply(task.description)
506
+ task.files = {k: apply(v) for k, v in task.files.items()}
507
+ task.starter_code = {k: apply(v) for k, v in task.starter_code.items()}
508
+ task.test_code = apply(task.test_code)
509
+ # Hidden tests use canonical names — not randomized (consistent eval)
510
+ return task