Spaces:

ayhm23
/

resume-env

Sleeping

resume-env / validate.py

ayhm23

fix: clamp all task scores to (0.001, 0.999) — scores must be strictly in (0,1)

1b95618 about 2 months ago

20.8 kB

	#!/usr/bin/env python3
	"""
	validate.py — ResumeEnv Pre-Submission Validator
	=================================================
	Run this BEFORE submitting your HF Spaces URL.
	Checks every criterion from the hackathon's automated disqualification list.

	Usage:
	# Against local server (start it first):
	python validate.py --base-url http://localhost:8000

	# Against deployed HF Space:
	python validate.py --base-url https://your-username-resume-env.hf.space

	Exit codes:
	0 — all checks passed
	1 — one or more checks failed
	"""

	import argparse
	import asyncio
	import json
	import sys
	import time
	import httpx


	PASS = "✅ PASS"
	FAIL = "❌ FAIL"
	WARN = "⚠️ WARN"

	results = []


	def check(name: str, passed: bool, detail: str = ""):
	icon = PASS if passed else FAIL
	msg = f" {icon} {name}"
	if detail:
	msg += f"\n {detail}"
	print(msg)
	results.append((name, passed, detail))
	return passed


	# ─────────────────────────────────────────────────────────────────────────────
	# 1. Health check — space must return 200
	# ─────────────────────────────────────────────────────────────────────────────

	def test_health(base_url: str):
	print("\n[1] Health Check")
	try:
	r = httpx.get(f"{base_url}/health", timeout=15)
	check("GET /health returns 200", r.status_code == 200, f"status={r.status_code}")
	data = r.json()
	check("/health body contains 'status'", "status" in data, str(data))
	except Exception as e:
	check("GET /health reachable", False, str(e))


	# ─────────────────────────────────────────────────────────────────────────────
	# 2. reset() — must accept task_id and return observation
	# ─────────────────────────────────────────────────────────────────────────────

	def test_reset(base_url: str):
	print("\n[2] reset() API")
	for task_id in [
	"task1_keyword_extraction",
	"task2_bullet_rewrite",
	"task3_full_application",
	]:
	try:
	r = httpx.post(
	f"{base_url}/reset",
	json={"task_id": task_id},
	timeout=15,
	)
	ok = r.status_code == 200
	check(f"POST /reset task_id={task_id}", ok, f"status={r.status_code}")
	if ok:
	data = r.json()
	obs = data.get("observation", data)
	has_jd = bool(obs.get("job_description", ""))
	check(f" observation.job_description present", has_jd)
	except Exception as e:
	check(f"POST /reset task_id={task_id}", False, str(e))


	# ─────────────────────────────────────────────────────────────────────────────
	# 3. step() — must accept actions and return reward + done
	# ─────────────────────────────────────────────────────────────────────────────

	def test_step(base_url: str):
	print("\n[3] step() API")

	# Task 1
	try:
	httpx.post(f"{base_url}/reset", json={"task_id": "task1_keyword_extraction"}, timeout=15)
	r = httpx.post(
	f"{base_url}/step",
	json={"action": {
	"action_type": "extract_keywords",
	"hard_skills": ["SQL", "Python"],
	"soft_skills": ["communication"],
	"experience_years": 3,
	"rewritten_bullet": "",
	"content": "",
	}},
	timeout=15,
	)
	ok = r.status_code == 200
	check("POST /step task1 returns 200", ok, f"status={r.status_code}")
	if ok:
	data = r.json()
	check(" reward in [0.0, 1.0]", 0.0 <= (data.get("reward") or 0.0) <= 1.0,
	f"reward={data.get('reward')}")
	check(" done is bool", isinstance(data.get("done"), bool))
	except Exception as e:
	check("POST /step task1", False, str(e))

	# Task 2
	try:
	httpx.post(f"{base_url}/reset", json={"task_id": "task2_bullet_rewrite"}, timeout=15)
	r = httpx.post(
	f"{base_url}/step",
	json={"action": {
	"action_type": "rewrite_bullet",
	"hard_skills": [],
	"soft_skills": [],
	"experience_years": 0,
	"rewritten_bullet": "Developed SQL dashboards tracking 15 KPIs, reducing reporting time by 30%.",
	"content": "",
	}},
	timeout=15,
	)
	ok = r.status_code == 200
	check("POST /step task2 returns 200", ok, f"status={r.status_code}")
	if ok:
	data = r.json()
	check(" reward in [0.0, 1.0]", 0.0 <= (data.get("reward") or 0.0) <= 1.0,
	f"reward={data.get('reward')}")
	except Exception as e:
	check("POST /step task2", False, str(e))

	# Task 3 — run all 4 steps
	try:
	httpx.post(f"{base_url}/reset", json={"task_id": "task3_full_application"}, timeout=15)
	steps = [
	("rewrite_summary", "Experienced analyst with 5 years driving data-informed decisions."),
	("rewrite_experience", "Developed SQL dashboards tracking 15 KPIs, reducing reporting time by 30%."),
	("update_skills", "SQL, Python, Tableau, BigQuery, dbt, stakeholder communication"),
	("write_cover_letter", "Dear Hiring Manager,\n\nI am excited to apply.\n\nSincerely, Applicant"),
	]
	last_reward = 0.0
	for action_type, content in steps:
	r = httpx.post(
	f"{base_url}/step",
	json={"action": {
	"action_type": action_type,
	"hard_skills": [], "soft_skills": [],
	"experience_years": 0,
	"rewritten_bullet": "",
	"content": content,
	}},
	timeout=15,
	)
	ok = r.status_code == 200
	if ok:
	data = r.json()
	last_reward = data.get("reward") or 0.0
	check("POST /step task3 full 4-step episode", ok, f"final reward={last_reward:.4f}")
	check(" final reward in [0.0, 1.0]", 0.0 <= last_reward <= 1.0)
	except Exception as e:
	check("POST /step task3", False, str(e))


	# ─────────────────────────────────────────────────────────────────────────────
	# 4. state() — must return episode metadata
	# ─────────────────────────────────────────────────────────────────────────────

	def test_state(base_url: str):
	print("\n[4] state() API")
	try:
	httpx.post(f"{base_url}/reset", json={"task_id": "task1_keyword_extraction"}, timeout=15)
	r = httpx.get(f"{base_url}/state", timeout=15)
	ok = r.status_code == 200
	check("GET /state returns 200", ok)
	if ok:
	data = r.json()
	check(" state has step_count", "step_count" in data, str(list(data.keys())))
	check(" state has episode_id", "episode_id" in data)
	except Exception as e:
	check("GET /state", False, str(e))


	# ─────────────────────────────────────────────────────────────────────────────
	# 5. /tasks — list with 3 tasks and action schemas
	# ─────────────────────────────────────────────────────────────────────────────

	def test_tasks(base_url: str):
	print("\n[5] /tasks Endpoint")
	try:
	r = httpx.get(f"{base_url}/tasks", timeout=15)
	ok = r.status_code == 200
	check("GET /tasks returns 200", ok)
	if ok:
	data = r.json()
	tasks = data.get("tasks", [])
	check(" at least 3 tasks returned", len(tasks) >= 3, f"got {len(tasks)}")
	diffs = {t.get("difficulty") for t in tasks}
	check(" easy + medium + hard all present", {"easy", "medium", "hard"} <= diffs,
	f"found: {diffs}")
	for t in tasks:
	has_schema = bool(t.get("action_schema"))
	check(f" task '{t.get('id')}' has action_schema", has_schema)
	except Exception as e:
	check("GET /tasks", False, str(e))


	# ─────────────────────────────────────────────────────────────────────────────
	# 6. /grader — returns score after completed episode
	# ─────────────────────────────────────────────────────────────────────────────

	def test_grader(base_url: str):
	print("\n[6] /grader Endpoint")
	try:
	# Complete an episode first
	httpx.post(f"{base_url}/reset", json={"task_id": "task1_keyword_extraction"}, timeout=15)
	httpx.post(
	f"{base_url}/step",
	json={"action": {
	"action_type": "extract_keywords",
	"hard_skills": ["SQL", "Python", "Tableau"],
	"soft_skills": ["communication"],
	"experience_years": 5,
	"rewritten_bullet": "", "content": "",
	}},
	timeout=15,
	)
	r = httpx.get(f"{base_url}/grader", timeout=15)
	ok = r.status_code == 200
	check("GET /grader returns 200", ok)
	if ok:
	data = r.json()
	score = data.get("final_score", data.get("grader_result", {}).get("final_score"))
	check(" final_score present and in [0.0, 1.0]",
	score is not None and 0.0 <= float(score) <= 1.0,
	f"final_score={score}")
	except Exception as e:
	check("GET /grader", False, str(e))


	# ─────────────────────────────────────────────────────────────────────────────
	# 7. /baseline — runs without error, returns 3 task scores
	# ─────────────────────────────────────────────────────────────────────────────

	def test_baseline(base_url: str):
	print("\n[7] /baseline Endpoint")
	try:
	r = httpx.post(f"{base_url}/baseline", timeout=60)
	ok = r.status_code == 200
	check("POST /baseline returns 200", ok, f"status={r.status_code}")
	if ok:
	data = r.json()
	results_data = data.get("results", {})
	check(" task1 score present", "task1_keyword_extraction" in results_data)
	check(" task2 score present", "task2_bullet_rewrite" in results_data)
	check(" task3 score present", "task3_full_application" in results_data)
	overall = data.get("overall_average_score", 0.0)
	check(" overall_average_score in [0.0, 1.0]",
	0.0 <= overall <= 1.0, f"overall={overall}")
	for task, r_data in results_data.items():
	score = r_data.get("score", -1)
	check(f" {task} score in [0.0, 1.0]",
	0.0 <= score <= 1.0, f"score={score}")
	except Exception as e:
	check("POST /baseline", False, str(e))


	# ─────────────────────────────────────────────────────────────────────────────
	# 8. Grader scores in range [0.0, 1.0] for all tasks
	# ─────────────────────────────────────────────────────────────────────────────

	def test_grader_ranges(base_url: str):
	print("\n[8] Grader Score Ranges (all tasks)")
	task_actions = {
	"task1_keyword_extraction": {
	"action_type": "extract_keywords",
	"hard_skills": ["SQL", "Python", "Tableau", "BigQuery"],
	"soft_skills": ["stakeholder communication"],
	"experience_years": 5,
	"rewritten_bullet": "", "content": "",
	},
	"task2_bullet_rewrite": {
	"action_type": "rewrite_bullet",
	"hard_skills": [], "soft_skills": [], "experience_years": 0,
	"rewritten_bullet": "Developed Python dashboards tracking 20 KPIs, reducing manual reporting by 45%.",
	"content": "",
	},
	}
	for task_id, action in task_actions.items():
	try:
	httpx.post(f"{base_url}/reset", json={"task_id": task_id}, timeout=15)
	r = httpx.post(f"{base_url}/step", json={"action": action}, timeout=15)
	data = r.json()
	reward = data.get("reward", -1)
	check(f" {task_id} reward in [0.0, 1.0]",
	0.0 <= reward <= 1.0, f"reward={reward}")
	except Exception as e:
	check(f" {task_id} grader", False, str(e))


	# ─────────────────────────────────────────────────────────────────────────────
	# 9. openenv.yaml present and valid
	# ─────────────────────────────────────────────────────────────────────────────

	def test_yaml():
	print("\n[9] openenv.yaml")
	import os
	try:
	path = os.path.join(os.path.dirname(__file__), "openenv.yaml")
	exists = os.path.exists(path)
	check("openenv.yaml exists at project root", exists)
	if exists:
	with open(path) as f:
	content = f.read()
	check(" contains 'name'", "name:" in content)
	check(" contains 'version'", "version:" in content)
	check(" contains 'sdk'", "sdk:" in content)
	except Exception as e:
	check("openenv.yaml readable", False, str(e))


	# ─────────────────────────────────────────────────────────────────────────────
	# 10. Dockerfile present
	# ─────────────────────────────────────────────────────────────────────────────

	def test_dockerfile():
	print("\n[10] Dockerfile")
	import os
	path = os.path.join(os.path.dirname(__file__), "server", "Dockerfile")
	exists = os.path.exists(path)
	check("server/Dockerfile exists", exists)
	if exists:
	with open(path) as f:
	content = f.read()
	check(" contains FROM (base image)", "FROM " in content)
	check(" contains EXPOSE 8000", "EXPOSE 8000" in content)
	check(" contains CMD or ENTRYPOINT", "CMD " in content or "ENTRYPOINT " in content)


	# ─────────────────────────────────────────────────────────────────────────────
	# 11. inference.py is in project root
	# ─────────────────────────────────────────────────────────────────────────────

	def test_inference_location():
	print("\n[11] inference.py")
	import os
	path = os.path.join(os.path.dirname(__file__), "inference.py")
	exists = os.path.exists(path)
	check("inference.py exists at PROJECT ROOT (not a subfolder)", exists)
	if exists:
	with open(path, encoding="utf-8") as f:
	content = f.read()
	check(" uses HF_TOKEN or OPENAI_API_KEY",
	"HF_TOKEN" in content or "OPENAI_API_KEY" in content)
	check(" uses MODEL_NAME", "MODEL_NAME" in content)
	check(" uses API_BASE_URL", "API_BASE_URL" in content)


	# ─────────────────────────────────────────────────────────────────────────────
	# 12. No heavy ML deps in server/requirements.txt
	# ─────────────────────────────────────────────────────────────────────────────

	def test_deps():
	print("\n[12] Dependency Safety")
	import os
	path = os.path.join(os.path.dirname(__file__), "server", "requirements.txt")
	if os.path.exists(path):
	with open(path, encoding="utf-8") as f:
	content = f.read().lower()
	BANNED = ["sentence-transformers", "torch", "tensorflow", "transformers",
	"spacy", "nltk", "gensim", "sklearn", "scikit-learn"]
	for dep in BANNED:
	check(f" server/requirements.txt does NOT contain '{dep}'",
	dep not in content, f"Found: {dep}")
	else:
	check("server/requirements.txt exists", False)


	# ─────────────────────────────────────────────────────────────────────────────
	# MAIN
	# ─────────────────────────────────────────────────────────────────────────────

	def main():
	parser = argparse.ArgumentParser(description="ResumeEnv Pre-Submission Validator")
	parser.add_argument("--base-url", default="http://localhost:8000",
	help="Base URL of running ResumeEnv server")
	args = parser.parse_args()
	base = args.base_url.rstrip("/")

	print("=" * 60)
	print(f"ResumeEnv Validator — {base}")
	print("=" * 60)

	# Static checks (no server needed)
	test_yaml()
	test_dockerfile()
	test_inference_location()
	test_deps()

	# Live server checks
	test_health(base)
	test_reset(base)
	test_step(base)
	test_state(base)
	test_tasks(base)
	test_grader(base)
	test_baseline(base)
	test_grader_ranges(base)

	# Summary
	passed = sum(1 for _, p, _ in results if p)
	total = len(results)
	failed = [(n, d) for n, p, d in results if not p]

	print("\n" + "=" * 60)
	print(f"RESULT: {passed}/{total} checks passed")

	if failed:
	print(f"\n{FAIL} Failed checks:")
	for name, detail in failed:
	print(f" • {name}")
	if detail:
	print(f" {detail}")
	print("\n⚠️ Fix all failures before submitting.")
	sys.exit(1)
	else:
	print("\n🎉 All checks passed! Safe to submit your HF Spaces URL.")
	sys.exit(0)


	if __name__ == "__main__":
	main()