| #!/bin/bash |
|
|
| |
|
|
| set -e |
| echo "βββββββββββββββββββββββββββββββββββββββ" |
| echo " OpenEnv Pre-Submission Validation" |
| echo "βββββββββββββββββββββββββββββββββββββββ" |
| echo "" |
|
|
| |
| echo "ββ 1. Required Files ββ" |
| FILES=("openenv.yaml" "inference.py" "README.md" "Dockerfile" "requirements.txt") |
| for file in "${FILES[@]}"; do |
| if [ -f "$file" ]; then |
| echo " β
$file" |
| else |
| echo " β Missing $file" |
| exit 1 |
| fi |
| done |
| echo "" |
|
|
| |
| echo "ββ 2. Server Module Structure ββ" |
| SERVER_FILES=("server/__init__.py" "server/app.py" "server/models.py" "server/environment.py" "server/tasks.py" "server/grader.py") |
| for file in "${SERVER_FILES[@]}"; do |
| if [ -f "$file" ]; then |
| echo " β
$file" |
| else |
| echo " β Missing $file" |
| exit 1 |
| fi |
| done |
| echo "" |
|
|
| |
| echo "ββ 3. Python Import Validation ββ" |
| source venv/bin/activate |
| python3 -c " |
| from server.tasks import TASKS |
| from server.grader import grade_action |
| from server.environment import CodeSecurityEnv |
| from server.models import CodeReviewAction, CodeObservation, StepResult, StateResponse, ResetResponse, TaskInfo |
| |
| assert len(TASKS) >= 3, f'Expected 3+ tasks, got {len(TASKS)}' |
| print(' β
All imports resolve correctly') |
| print(f' Tasks: {list(TASKS.keys())}') |
| " || { echo " β Python import validation failed"; exit 1; } |
| echo "" |
|
|
| |
| echo "ββ 4. Grader Smoke Test ββ" |
| python3 -c " |
| from server.environment import CodeSecurityEnv |
| from server.models import Action |
| |
| env = CodeSecurityEnv() |
| obs = env.reset('python-off-by-one') |
| result = env.step(Action(**{ |
| 'bug_identified': True, |
| 'bug_location': 'range(len(transactions) + 1)', |
| 'bug_type': 'logic-error', |
| 'bug_description': 'Off-by-one index error β the range goes one past the end causing an out of bounds IndexError', |
| 'severity': 'medium', |
| 'suggested_fix': 'Use range(len(transactions)) to fix the boundary', |
| })) |
| assert 0.0 <= result.reward <= 1.0, f'Reward out of range: {result.reward}' |
| assert result.done is True |
| print(f' β
Grader returned reward={result.reward:.4f}, done={result.done}') |
| |
| # Verify zero-reward path |
| env2 = CodeSecurityEnv() |
| env2.reset('python-off-by-one') |
| r2 = env2.step(Action(**{ |
| 'bug_identified': False, |
| 'bug_location': '', |
| 'bug_type': 'none', |
| 'bug_description': 'No bug found', |
| 'severity': 'none', |
| 'suggested_fix': '', |
| })) |
| assert r2.reward == 0.0, f'Expected 0.0 for no-bug, got {r2.reward}' |
| print(f' β
No-bug path returns reward=0.0') |
| " || { echo " β Grader smoke test failed"; exit 1; } |
| echo "" |
|
|
| |
| echo "ββ 5. openenv.yaml Validation ββ" |
| python3 -c " |
| import yaml |
| with open('openenv.yaml', 'r') as f: |
| data = yaml.safe_load(f) |
| assert 'name' in data, 'Missing name field' |
| assert 'tasks' in data, 'Missing tasks field' |
| assert len(data['tasks']) >= 3, f'Need 3+ tasks, got {len(data[\"tasks\"])}' |
| print(f' β
Valid YAML with {len(data[\"tasks\"])} tasks') |
| " || { echo " β openenv.yaml validation failed"; exit 1; } |
| echo "" |
|
|
| echo "βββββββββββββββββββββββββββββββββββββββ" |
| echo " β
All checks passed!" |
| echo "βββββββββββββββββββββββββββββββββββββββ" |
|
|