Spaces:

madhurithika22
/

spec-guard

Sleeping

App Files Files Community

spec-guard / server /tasks.py

madhurithika22

Upload folder using huggingface_hub

f0f0dbb verified about 2 months ago

raw

history blame contribute delete

3.37 kB

	# =========================
	# 🔥 SCORE NORMALIZATION
	# =========================
	def normalize_score(score: float) -> float:
	if score <= 0.05:
	return 0.10
	if score >= 0.95:
	return 0.90
	return round(float(score), 2)


	# =========================
	# 🔥 GRADERS
	# =========================
	def grade_data_cleaning(action):
	score = 0.0
	steps = " ".join(action.steps).lower()
	if "null" in steps: score += 0.3
	if "duplicate" in steps: score += 0.3
	if "sort" in steps: score += 0.2
	if "[2,5,9]" in str(action.output): score += 0.2
	return normalize_score(score)

	def grade_financial(action):
	score = 0.0
	steps = " ".join(action.steps).lower()
	if "risk" in steps: score += 0.4
	if "compare" in steps: score += 0.3
	if action.output.strip() == "A": score += 0.2
	return normalize_score(score)

	def grade_instruction(action):
	score = 0.0
	if len(action.steps) >= 2: score += 0.4
	if "4" in str(action.output): score += 0.4
	return normalize_score(score)


	# =========================
	# 🔥 TASKS (VALIDATOR SOURCE)
	# =========================
	TASKS = [
	{
	"id": "task_1",
	"name": "data_cleaning_pipeline",
	"input": {"data": [5, None, 2, 2, 9]},
	"instruction": "Remove nulls, duplicates, and sort ascending.",

	"required_steps": [
	"remove null",
	"remove duplicate",
	"sort"
	],

	"forbidden": [
	"skip",
	"direct output",
	"hardcoded"
	],

	"output_schema": {
	"type": "object",
	"properties": {
	"steps": {"type": "array", "items": {"type": "string"}},
	"output": {"type": "string"}
	}
	},

	"grader": grade_data_cleaning
	},

	{
	"id": "task_2",
	"name": "risk_aware_financial_choice",
	"input": {
	"options": [
	{"name": "A", "risk": "low"},
	{"name": "B", "risk": "high"}
	]
	},
	"instruction": "Choose the safest option.",

	"required_steps": [
	"analyze risk",
	"compare options"
	],

	"forbidden": [
	"random",
	"guess",
	"no analysis"
	],

	"output_schema": {
	"type": "object",
	"properties": {
	"steps": {"type": "array", "items": {"type": "string"}},
	"output": {"type": "string"}
	}
	},

	"grader": grade_financial
	},

	{
	"id": "task_3",
	"name": "instruction_adherence_test",
	"input": {"question": "What is 2 + 2?"},
	"instruction": "Show reasoning before answering.",

	"required_steps": [
	"show reasoning",
	"compute result"
	],

	"forbidden": [
	"direct answer",
	"no reasoning"
	],

	"output_schema": {
	"type": "object",
	"properties": {
	"steps": {"type": "array", "items": {"type": "string"}},
	"output": {"type": "string"}
	}
	},

	"grader": grade_instruction
	}
	]