content-moderation-env / verify_tasks.py
ANI00's picture
Add: Task and grader registry for validation
2a9d296 verified
#!/usr/bin/env python
"""
Verify that at least 3 tasks with graders are defined.
This script checks the test/test.py structure for validator compliance.
"""
import sys
import os
# Parse the test file and check for TASK_WITH_GRADERS
test_file = "test/test.py"
with open(test_file, "r") as f:
content = f.read()
# Check for TASK_WITH_GRADERS dict definition
if "TASK_WITH_GRADERS = {" not in content:
print("❌ FAIL: TASK_WITH_GRADERS not found in test/test.py")
sys.exit(1)
# Count distinct task IDs (text_spam, content_moderation, deepfake_detection)
task_ids = ["text_spam", "content_moderation", "deepfake_detection"]
tasks_found = []
for task_id in task_ids:
if f'"{task_id}":' in content or f"'{task_id}':" in content:
tasks_found.append(task_id)
task_count = len(tasks_found)
print(f"Found {task_count} tasks with graders defined: {', '.join(tasks_found)}")
if task_count < 3:
print(f"❌ FAIL: Only {task_count} task(s) found. Need at least 3.")
sys.exit(1)
# Check for required fields in TASK_WITH_GRADERS
required_fields = ["name", "difficulty", "description", "grader"]
for field in required_fields:
if f'"{field}":' not in content:
print(f"❌ FAIL: Missing required field '{field}' in task definitions")
sys.exit(1)
print(f" βœ“ Field '{field}' found")
# Check for difficulty levels
difficulties_found = []
for diff in ["easy", "medium", "hard"]:
if f'"{diff}"' in content or f"'{diff}'" in content:
difficulties_found.append(diff)
print(f" βœ“ Difficulty level '{diff}' found")
if len(difficulties_found) < 3:
print(f"❌ FAIL: Only {len(difficulties_found)} difficulty level(s) found. Need all 3: easy, medium, hard")
sys.exit(1)
# Check for grader references
graders_found = []
for grader in ["grade_text_spam", "grade_content_moderation", "grade_deepfake"]:
if grader in content:
graders_found.append(grader)
print(f" βœ“ Grader function '{grader}' referenced")
if len(graders_found) < 3:
print(f"❌ FAIL: Only {len(graders_found)} grader(s) found. Need all 3.")
sys.exit(1)
# Check for test functions
required_tests = [
"test_registry_structure_has_at_least_three_tasks",
"test_registry_structure_task_count_equals_three",
"test_registry_structure_all_tasks_have_required_fields",
]
print("")
for test_name in required_tests:
if f"def {test_name}(" not in content:
print(f"❌ FAIL: Test function '{test_name}' not found")
sys.exit(1)
print(f" βœ“ Test '{test_name}' defined")
print("")
print("βœ… PASS: All validation checks passed!")
print("")
print("Task Registry Summary:")
print(f" β€’ Total tasks with graders: {task_count}")
print(f" β€’ Tasks: {', '.join(tasks_found)}")
print(f" β€’ Difficulty levels: {', '.join(difficulties_found)}")
print(f" β€’ Graders: {', '.join(graders_found)}")
print(f" β€’ Required fields: {', '.join(required_fields)}")
print(f" β€’ Validator tests: {len(required_tests)} core tests defined")
print("")
print("βœ“ Validator requirement met: At least 3 tasks with graders")