File size: 1,408 Bytes
e75c8ce
6e72b95
351158b
e75c8ce
 
 
 
351158b
e75c8ce
 
 
6e72b95
d342897
351158b
 
 
 
 
e75c8ce
 
351158b
 
 
 
 
 
 
e75c8ce
 
351158b
 
 
e75c8ce
351158b
 
 
e75c8ce
 
351158b
d342897
351158b
 
 
 
e75c8ce
 
 
351158b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
spec_version: 1
name: cache_invalidation_env
version: "1.0.0"
type: space
runtime: fastapi
app: server.app:app
port: 7860
description: >
  Cache invalidation under uncertainty: agents choose invalidate, refresh, or keep per step
  from noisy hit/stale observations. Three difficulty tasks (easy → hard), each with a
  programmatic episode grader (final_score in [0,1]).

tasks:
  - name: easy
    description: "Fewer cache items and low volatility; easier to infer staleness from noisy observations."
    difficulty: easy
    max_steps: 10
    grader: true
    grader_kind: programmatic
    grader_callable: env.task_graders:easy_agent_grader
    score_range: [0.0, 1.0]

  - name: medium
    description: "More items and moderate volatility; invalidation vs keep tradeoffs matter more."
    difficulty: medium
    max_steps: 10
    grader: true
    grader_kind: programmatic
    grader_callable: env.task_graders:medium_agent_grader
    score_range: [0.0, 1.0]

  - name: hard
    description: "Most items and high volatility; noisy staleness signal and harder tradeoffs."
    difficulty: hard
    max_steps: 10
    grader: true
    grader_kind: programmatic
    grader_callable: env.task_graders:hard_agent_grader
    score_range: [0.0, 1.0]

endpoints:
  reset: POST /reset
  step: POST /step
  state: GET /state
  schema: GET /schema
  metadata: GET /metadata
  health: GET /health
  tasks: GET /tasks