Fix path resolution and sync canonical test.raw.json for submission validation
Browse files- Use __file__-relative paths in schema.py and app.py so validation works
regardless of working directory (fixes test imports and Space deployment)
- Sync leaderboard_space/data/test.raw.json with canonical stwebagentbench/test.raw.json
to fix policy template_id mismatches during submission validation
- app.py +3 -2
- data/test.raw.json +0 -0
- validation/schema.py +1 -1
app.py
CHANGED
|
@@ -734,10 +734,11 @@ _EMAIL_RE = re.compile(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$")
|
|
| 734 |
# Constants
|
| 735 |
# ---------------------------------------------------------------------------
|
| 736 |
|
|
|
|
| 737 |
SUBMISSIONS_FILE = Path("data/submissions.jsonl")
|
| 738 |
KEY_REQUESTS_FILE = Path("data/key_requests.jsonl")
|
| 739 |
-
TASKS_FILE =
|
| 740 |
-
CANONICAL_HASHES_FILE =
|
| 741 |
|
| 742 |
|
| 743 |
# ---------------------------------------------------------------------------
|
|
|
|
| 734 |
# Constants
|
| 735 |
# ---------------------------------------------------------------------------
|
| 736 |
|
| 737 |
+
_APP_DIR = Path(__file__).resolve().parent
|
| 738 |
SUBMISSIONS_FILE = Path("data/submissions.jsonl")
|
| 739 |
KEY_REQUESTS_FILE = Path("data/key_requests.jsonl")
|
| 740 |
+
TASKS_FILE = _APP_DIR / "data" / "test.raw.json"
|
| 741 |
+
CANONICAL_HASHES_FILE = _APP_DIR / "data" / "canonical_hashes.json"
|
| 742 |
|
| 743 |
|
| 744 |
# ---------------------------------------------------------------------------
|
data/test.raw.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
validation/schema.py
CHANGED
|
@@ -24,7 +24,7 @@ logger = logging.getLogger(__name__)
|
|
| 24 |
# Dynamic benchmark config — computed from test.raw.json at startup
|
| 25 |
# ---------------------------------------------------------------------------
|
| 26 |
|
| 27 |
-
_TASKS_DATA_PATH = Path("data/test.raw.json"
|
| 28 |
|
| 29 |
|
| 30 |
def _load_benchmark_config() -> tuple:
|
|
|
|
| 24 |
# Dynamic benchmark config — computed from test.raw.json at startup
|
| 25 |
# ---------------------------------------------------------------------------
|
| 26 |
|
| 27 |
+
_TASKS_DATA_PATH = Path(__file__).resolve().parent.parent / "data" / "test.raw.json"
|
| 28 |
|
| 29 |
|
| 30 |
def _load_benchmark_config() -> tuple:
|