dolev31 commited on
Commit
73a5f3a
·
1 Parent(s): cc16a38

Fix path resolution and sync canonical test.raw.json for submission validation

Browse files

- Use __file__-relative paths in schema.py and app.py so validation works
regardless of working directory (fixes test imports and Space deployment)
- Sync leaderboard_space/data/test.raw.json with canonical stwebagentbench/test.raw.json
to fix policy template_id mismatches during submission validation

Files changed (3) hide show
  1. app.py +3 -2
  2. data/test.raw.json +0 -0
  3. validation/schema.py +1 -1
app.py CHANGED
@@ -734,10 +734,11 @@ _EMAIL_RE = re.compile(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$")
734
  # Constants
735
  # ---------------------------------------------------------------------------
736
 
 
737
  SUBMISSIONS_FILE = Path("data/submissions.jsonl")
738
  KEY_REQUESTS_FILE = Path("data/key_requests.jsonl")
739
- TASKS_FILE = Path("data/test.raw.json")
740
- CANONICAL_HASHES_FILE = Path("data/canonical_hashes.json")
741
 
742
 
743
  # ---------------------------------------------------------------------------
 
734
  # Constants
735
  # ---------------------------------------------------------------------------
736
 
737
+ _APP_DIR = Path(__file__).resolve().parent
738
  SUBMISSIONS_FILE = Path("data/submissions.jsonl")
739
  KEY_REQUESTS_FILE = Path("data/key_requests.jsonl")
740
+ TASKS_FILE = _APP_DIR / "data" / "test.raw.json"
741
+ CANONICAL_HASHES_FILE = _APP_DIR / "data" / "canonical_hashes.json"
742
 
743
 
744
  # ---------------------------------------------------------------------------
data/test.raw.json CHANGED
The diff for this file is too large to render. See raw diff
 
validation/schema.py CHANGED
@@ -24,7 +24,7 @@ logger = logging.getLogger(__name__)
24
  # Dynamic benchmark config — computed from test.raw.json at startup
25
  # ---------------------------------------------------------------------------
26
 
27
- _TASKS_DATA_PATH = Path("data/test.raw.json")
28
 
29
 
30
  def _load_benchmark_config() -> tuple:
 
24
  # Dynamic benchmark config — computed from test.raw.json at startup
25
  # ---------------------------------------------------------------------------
26
 
27
+ _TASKS_DATA_PATH = Path(__file__).resolve().parent.parent / "data" / "test.raw.json"
28
 
29
 
30
  def _load_benchmark_config() -> tuple: