spec-guard / server /tasks.py
madhurithika22's picture
Upload folder using huggingface_hub
f0f0dbb verified
# =========================
# 🔥 SCORE NORMALIZATION
# =========================
def normalize_score(score: float) -> float:
if score <= 0.05:
return 0.10
if score >= 0.95:
return 0.90
return round(float(score), 2)
# =========================
# 🔥 GRADERS
# =========================
def grade_data_cleaning(action):
score = 0.0
steps = " ".join(action.steps).lower()
if "null" in steps: score += 0.3
if "duplicate" in steps: score += 0.3
if "sort" in steps: score += 0.2
if "[2,5,9]" in str(action.output): score += 0.2
return normalize_score(score)
def grade_financial(action):
score = 0.0
steps = " ".join(action.steps).lower()
if "risk" in steps: score += 0.4
if "compare" in steps: score += 0.3
if action.output.strip() == "A": score += 0.2
return normalize_score(score)
def grade_instruction(action):
score = 0.0
if len(action.steps) >= 2: score += 0.4
if "4" in str(action.output): score += 0.4
return normalize_score(score)
# =========================
# 🔥 TASKS (VALIDATOR SOURCE)
# =========================
TASKS = [
{
"id": "task_1",
"name": "data_cleaning_pipeline",
"input": {"data": [5, None, 2, 2, 9]},
"instruction": "Remove nulls, duplicates, and sort ascending.",
"required_steps": [
"remove null",
"remove duplicate",
"sort"
],
"forbidden": [
"skip",
"direct output",
"hardcoded"
],
"output_schema": {
"type": "object",
"properties": {
"steps": {"type": "array", "items": {"type": "string"}},
"output": {"type": "string"}
}
},
"grader": grade_data_cleaning
},
{
"id": "task_2",
"name": "risk_aware_financial_choice",
"input": {
"options": [
{"name": "A", "risk": "low"},
{"name": "B", "risk": "high"}
]
},
"instruction": "Choose the safest option.",
"required_steps": [
"analyze risk",
"compare options"
],
"forbidden": [
"random",
"guess",
"no analysis"
],
"output_schema": {
"type": "object",
"properties": {
"steps": {"type": "array", "items": {"type": "string"}},
"output": {"type": "string"}
}
},
"grader": grade_financial
},
{
"id": "task_3",
"name": "instruction_adherence_test",
"input": {"question": "What is 2 + 2?"},
"instruction": "Show reasoning before answering.",
"required_steps": [
"show reasoning",
"compute result"
],
"forbidden": [
"direct answer",
"no reasoning"
],
"output_schema": {
"type": "object",
"properties": {
"steps": {"type": "array", "items": {"type": "string"}},
"output": {"type": "string"}
}
},
"grader": grade_instruction
}
]