Spaces:
Sleeping
Sleeping
| set -euo pipefail | |
| REPO_DIR="${1:-.}" | |
| cd "$REPO_DIR" | |
| echo "==> 1/3 openenv task grader check" | |
| python3 - <<'PY' | |
| import sys, yaml | |
| from pathlib import Path | |
| doc = yaml.safe_load(Path('openenv.yaml').read_text()) | |
| tasks = doc.get('tasks', []) | |
| with_grader = [t.get('id') for t in tasks if isinstance(t, dict) and t.get('grader')] | |
| print(f"tasks_total={len(tasks)} tasks_with_grader={len(with_grader)}") | |
| if len(with_grader) < 3: | |
| raise SystemExit(1) | |
| PY | |
| echo "==> 2/3 inference structured logs check" | |
| TMP_OUT="/tmp/pre_validation_results.json" | |
| TMP_LOG="/tmp/pre_validation_stdout.log" | |
| python3 inference.py --max-steps 1 --output "$TMP_OUT" > "$TMP_LOG" 2>/tmp/pre_validation_stderr.log | |
| python3 - <<'PY' | |
| import re, sys, yaml | |
| from pathlib import Path | |
| log = Path('/tmp/pre_validation_stdout.log').read_text().splitlines() | |
| doc = yaml.safe_load(Path('openenv.yaml').read_text()) | |
| valid_ids = {t['id'] for t in doc.get('tasks', []) if isinstance(t, dict) and 'id' in t} | |
| start_tasks = [] | |
| end_tasks = [] | |
| for line in log: | |
| if line.startswith('[START] '): | |
| m = re.search(r'\btask=([^\s]+)', line) | |
| if m: | |
| start_tasks.append(m.group(1)) | |
| if line.startswith('[END] '): | |
| m = re.search(r'\btask=([^\s]+)', line) | |
| if m: | |
| end_tasks.append(m.group(1)) | |
| if len(start_tasks) < 3 or len(end_tasks) < 3: | |
| print(f"expected >=3 START/END lines, got start={len(start_tasks)} end={len(end_tasks)}") | |
| raise SystemExit(1) | |
| if start_tasks != end_tasks: | |
| print(f"START/END task mismatch: start={start_tasks} end={end_tasks}") | |
| raise SystemExit(1) | |
| if any(t not in valid_ids for t in start_tasks): | |
| print(f"unknown task id in logs: {start_tasks}") | |
| raise SystemExit(1) | |
| print(f"structured_log_tasks={start_tasks}") | |
| PY | |
| echo "==> 3/3 score range check" | |
| python3 - <<'PY' | |
| import json | |
| from pathlib import Path | |
| payload = json.loads(Path('/tmp/pre_validation_results.json').read_text()) | |
| results = payload.get('results') if isinstance(payload, dict) else None | |
| if not isinstance(results, list) or len(results) < 3: | |
| raise SystemExit(1) | |
| for row in results: | |
| score = float(row.get('task_score', 0.0)) | |
| if not (0.01 <= score <= 0.99): | |
| print(f"out_of_range task={row.get('task_id')} score={score}") | |
| raise SystemExit(1) | |
| print('all_scores_in_[0.01,0.99]=true') | |
| PY | |
| echo "PASS: pre_validation.sh (3/3)" | |