Spaces:
Sleeping
Sleeping
File size: 3,189 Bytes
2a9d296 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 | #!/usr/bin/env python
"""
Verify that at least 3 tasks with graders are defined.
This script checks the test/test.py structure for validator compliance.
"""
import sys
import os
# Parse the test file and check for TASK_WITH_GRADERS
test_file = "test/test.py"
with open(test_file, "r") as f:
content = f.read()
# Check for TASK_WITH_GRADERS dict definition
if "TASK_WITH_GRADERS = {" not in content:
print("β FAIL: TASK_WITH_GRADERS not found in test/test.py")
sys.exit(1)
# Count distinct task IDs (text_spam, content_moderation, deepfake_detection)
task_ids = ["text_spam", "content_moderation", "deepfake_detection"]
tasks_found = []
for task_id in task_ids:
if f'"{task_id}":' in content or f"'{task_id}':" in content:
tasks_found.append(task_id)
task_count = len(tasks_found)
print(f"Found {task_count} tasks with graders defined: {', '.join(tasks_found)}")
if task_count < 3:
print(f"β FAIL: Only {task_count} task(s) found. Need at least 3.")
sys.exit(1)
# Check for required fields in TASK_WITH_GRADERS
required_fields = ["name", "difficulty", "description", "grader"]
for field in required_fields:
if f'"{field}":' not in content:
print(f"β FAIL: Missing required field '{field}' in task definitions")
sys.exit(1)
print(f" β Field '{field}' found")
# Check for difficulty levels
difficulties_found = []
for diff in ["easy", "medium", "hard"]:
if f'"{diff}"' in content or f"'{diff}'" in content:
difficulties_found.append(diff)
print(f" β Difficulty level '{diff}' found")
if len(difficulties_found) < 3:
print(f"β FAIL: Only {len(difficulties_found)} difficulty level(s) found. Need all 3: easy, medium, hard")
sys.exit(1)
# Check for grader references
graders_found = []
for grader in ["grade_text_spam", "grade_content_moderation", "grade_deepfake"]:
if grader in content:
graders_found.append(grader)
print(f" β Grader function '{grader}' referenced")
if len(graders_found) < 3:
print(f"β FAIL: Only {len(graders_found)} grader(s) found. Need all 3.")
sys.exit(1)
# Check for test functions
required_tests = [
"test_registry_structure_has_at_least_three_tasks",
"test_registry_structure_task_count_equals_three",
"test_registry_structure_all_tasks_have_required_fields",
]
print("")
for test_name in required_tests:
if f"def {test_name}(" not in content:
print(f"β FAIL: Test function '{test_name}' not found")
sys.exit(1)
print(f" β Test '{test_name}' defined")
print("")
print("β
PASS: All validation checks passed!")
print("")
print("Task Registry Summary:")
print(f" β’ Total tasks with graders: {task_count}")
print(f" β’ Tasks: {', '.join(tasks_found)}")
print(f" β’ Difficulty levels: {', '.join(difficulties_found)}")
print(f" β’ Graders: {', '.join(graders_found)}")
print(f" β’ Required fields: {', '.join(required_fields)}")
print(f" β’ Validator tests: {len(required_tests)} core tests defined")
print("")
print("β Validator requirement met: At least 3 tasks with graders")
|