Spaces:
Sleeping
Sleeping
File size: 2,378 Bytes
344c3c9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | #!/usr/bin/env bash
set -euo pipefail
REPO_DIR="${1:-.}"
cd "$REPO_DIR"
echo "==> 1/3 openenv task grader check"
python3 - <<'PY'
import sys, yaml
from pathlib import Path
doc = yaml.safe_load(Path('openenv.yaml').read_text())
tasks = doc.get('tasks', [])
with_grader = [t.get('id') for t in tasks if isinstance(t, dict) and t.get('grader')]
print(f"tasks_total={len(tasks)} tasks_with_grader={len(with_grader)}")
if len(with_grader) < 3:
raise SystemExit(1)
PY
echo "==> 2/3 inference structured logs check"
TMP_OUT="/tmp/pre_validation_results.json"
TMP_LOG="/tmp/pre_validation_stdout.log"
python3 inference.py --max-steps 1 --output "$TMP_OUT" > "$TMP_LOG" 2>/tmp/pre_validation_stderr.log
python3 - <<'PY'
import re, sys, yaml
from pathlib import Path
log = Path('/tmp/pre_validation_stdout.log').read_text().splitlines()
doc = yaml.safe_load(Path('openenv.yaml').read_text())
valid_ids = {t['id'] for t in doc.get('tasks', []) if isinstance(t, dict) and 'id' in t}
start_tasks = []
end_tasks = []
for line in log:
if line.startswith('[START] '):
m = re.search(r'\btask=([^\s]+)', line)
if m:
start_tasks.append(m.group(1))
if line.startswith('[END] '):
m = re.search(r'\btask=([^\s]+)', line)
if m:
end_tasks.append(m.group(1))
if len(start_tasks) < 3 or len(end_tasks) < 3:
print(f"expected >=3 START/END lines, got start={len(start_tasks)} end={len(end_tasks)}")
raise SystemExit(1)
if start_tasks != end_tasks:
print(f"START/END task mismatch: start={start_tasks} end={end_tasks}")
raise SystemExit(1)
if any(t not in valid_ids for t in start_tasks):
print(f"unknown task id in logs: {start_tasks}")
raise SystemExit(1)
print(f"structured_log_tasks={start_tasks}")
PY
echo "==> 3/3 score range check"
python3 - <<'PY'
import json
from pathlib import Path
payload = json.loads(Path('/tmp/pre_validation_results.json').read_text())
results = payload.get('results') if isinstance(payload, dict) else None
if not isinstance(results, list) or len(results) < 3:
raise SystemExit(1)
for row in results:
score = float(row.get('task_score', 0.0))
if not (0.01 <= score <= 0.99):
print(f"out_of_range task={row.get('task_id')} score={score}")
raise SystemExit(1)
print('all_scores_in_[0.01,0.99]=true')
PY
echo "PASS: pre_validation.sh (3/3)"
|