Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| validate.py β ResumeEnv Pre-Submission Validator | |
| ================================================= | |
| Run this BEFORE submitting your HF Spaces URL. | |
| Checks every criterion from the hackathon's automated disqualification list. | |
| Usage: | |
| # Against local server (start it first): | |
| python validate.py --base-url http://localhost:8000 | |
| # Against deployed HF Space: | |
| python validate.py --base-url https://your-username-resume-env.hf.space | |
| Exit codes: | |
| 0 β all checks passed | |
| 1 β one or more checks failed | |
| """ | |
| import argparse | |
| import asyncio | |
| import json | |
| import sys | |
| import time | |
| import httpx | |
| PASS = "β PASS" | |
| FAIL = "β FAIL" | |
| WARN = "β οΈ WARN" | |
| results = [] | |
| def check(name: str, passed: bool, detail: str = ""): | |
| icon = PASS if passed else FAIL | |
| msg = f" {icon} {name}" | |
| if detail: | |
| msg += f"\n {detail}" | |
| print(msg) | |
| results.append((name, passed, detail)) | |
| return passed | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 1. Health check β space must return 200 | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def test_health(base_url: str): | |
| print("\n[1] Health Check") | |
| try: | |
| r = httpx.get(f"{base_url}/health", timeout=15) | |
| check("GET /health returns 200", r.status_code == 200, f"status={r.status_code}") | |
| data = r.json() | |
| check("/health body contains 'status'", "status" in data, str(data)) | |
| except Exception as e: | |
| check("GET /health reachable", False, str(e)) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 2. reset() β must accept task_id and return observation | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def test_reset(base_url: str): | |
| print("\n[2] reset() API") | |
| for task_id in [ | |
| "task1_keyword_extraction", | |
| "task2_bullet_rewrite", | |
| "task3_full_application", | |
| ]: | |
| try: | |
| r = httpx.post( | |
| f"{base_url}/reset", | |
| json={"task_id": task_id}, | |
| timeout=15, | |
| ) | |
| ok = r.status_code == 200 | |
| check(f"POST /reset task_id={task_id}", ok, f"status={r.status_code}") | |
| if ok: | |
| data = r.json() | |
| obs = data.get("observation", data) | |
| has_jd = bool(obs.get("job_description", "")) | |
| check(f" observation.job_description present", has_jd) | |
| except Exception as e: | |
| check(f"POST /reset task_id={task_id}", False, str(e)) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 3. step() β must accept actions and return reward + done | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def test_step(base_url: str): | |
| print("\n[3] step() API") | |
| # Task 1 | |
| try: | |
| httpx.post(f"{base_url}/reset", json={"task_id": "task1_keyword_extraction"}, timeout=15) | |
| r = httpx.post( | |
| f"{base_url}/step", | |
| json={"action": { | |
| "action_type": "extract_keywords", | |
| "hard_skills": ["SQL", "Python"], | |
| "soft_skills": ["communication"], | |
| "experience_years": 3, | |
| "rewritten_bullet": "", | |
| "content": "", | |
| }}, | |
| timeout=15, | |
| ) | |
| ok = r.status_code == 200 | |
| check("POST /step task1 returns 200", ok, f"status={r.status_code}") | |
| if ok: | |
| data = r.json() | |
| check(" reward in [0.0, 1.0]", 0.0 <= (data.get("reward") or 0.0) <= 1.0, | |
| f"reward={data.get('reward')}") | |
| check(" done is bool", isinstance(data.get("done"), bool)) | |
| except Exception as e: | |
| check("POST /step task1", False, str(e)) | |
| # Task 2 | |
| try: | |
| httpx.post(f"{base_url}/reset", json={"task_id": "task2_bullet_rewrite"}, timeout=15) | |
| r = httpx.post( | |
| f"{base_url}/step", | |
| json={"action": { | |
| "action_type": "rewrite_bullet", | |
| "hard_skills": [], | |
| "soft_skills": [], | |
| "experience_years": 0, | |
| "rewritten_bullet": "Developed SQL dashboards tracking 15 KPIs, reducing reporting time by 30%.", | |
| "content": "", | |
| }}, | |
| timeout=15, | |
| ) | |
| ok = r.status_code == 200 | |
| check("POST /step task2 returns 200", ok, f"status={r.status_code}") | |
| if ok: | |
| data = r.json() | |
| check(" reward in [0.0, 1.0]", 0.0 <= (data.get("reward") or 0.0) <= 1.0, | |
| f"reward={data.get('reward')}") | |
| except Exception as e: | |
| check("POST /step task2", False, str(e)) | |
| # Task 3 β run all 4 steps | |
| try: | |
| httpx.post(f"{base_url}/reset", json={"task_id": "task3_full_application"}, timeout=15) | |
| steps = [ | |
| ("rewrite_summary", "Experienced analyst with 5 years driving data-informed decisions."), | |
| ("rewrite_experience", "Developed SQL dashboards tracking 15 KPIs, reducing reporting time by 30%."), | |
| ("update_skills", "SQL, Python, Tableau, BigQuery, dbt, stakeholder communication"), | |
| ("write_cover_letter", "Dear Hiring Manager,\n\nI am excited to apply.\n\nSincerely, Applicant"), | |
| ] | |
| last_reward = 0.0 | |
| for action_type, content in steps: | |
| r = httpx.post( | |
| f"{base_url}/step", | |
| json={"action": { | |
| "action_type": action_type, | |
| "hard_skills": [], "soft_skills": [], | |
| "experience_years": 0, | |
| "rewritten_bullet": "", | |
| "content": content, | |
| }}, | |
| timeout=15, | |
| ) | |
| ok = r.status_code == 200 | |
| if ok: | |
| data = r.json() | |
| last_reward = data.get("reward") or 0.0 | |
| check("POST /step task3 full 4-step episode", ok, f"final reward={last_reward:.4f}") | |
| check(" final reward in [0.0, 1.0]", 0.0 <= last_reward <= 1.0) | |
| except Exception as e: | |
| check("POST /step task3", False, str(e)) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 4. state() β must return episode metadata | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def test_state(base_url: str): | |
| print("\n[4] state() API") | |
| try: | |
| httpx.post(f"{base_url}/reset", json={"task_id": "task1_keyword_extraction"}, timeout=15) | |
| r = httpx.get(f"{base_url}/state", timeout=15) | |
| ok = r.status_code == 200 | |
| check("GET /state returns 200", ok) | |
| if ok: | |
| data = r.json() | |
| check(" state has step_count", "step_count" in data, str(list(data.keys()))) | |
| check(" state has episode_id", "episode_id" in data) | |
| except Exception as e: | |
| check("GET /state", False, str(e)) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 5. /tasks β list with 3 tasks and action schemas | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def test_tasks(base_url: str): | |
| print("\n[5] /tasks Endpoint") | |
| try: | |
| r = httpx.get(f"{base_url}/tasks", timeout=15) | |
| ok = r.status_code == 200 | |
| check("GET /tasks returns 200", ok) | |
| if ok: | |
| data = r.json() | |
| tasks = data.get("tasks", []) | |
| check(" at least 3 tasks returned", len(tasks) >= 3, f"got {len(tasks)}") | |
| diffs = {t.get("difficulty") for t in tasks} | |
| check(" easy + medium + hard all present", {"easy", "medium", "hard"} <= diffs, | |
| f"found: {diffs}") | |
| for t in tasks: | |
| has_schema = bool(t.get("action_schema")) | |
| check(f" task '{t.get('id')}' has action_schema", has_schema) | |
| except Exception as e: | |
| check("GET /tasks", False, str(e)) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 6. /grader β returns score after completed episode | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def test_grader(base_url: str): | |
| print("\n[6] /grader Endpoint") | |
| try: | |
| # Complete an episode first | |
| httpx.post(f"{base_url}/reset", json={"task_id": "task1_keyword_extraction"}, timeout=15) | |
| httpx.post( | |
| f"{base_url}/step", | |
| json={"action": { | |
| "action_type": "extract_keywords", | |
| "hard_skills": ["SQL", "Python", "Tableau"], | |
| "soft_skills": ["communication"], | |
| "experience_years": 5, | |
| "rewritten_bullet": "", "content": "", | |
| }}, | |
| timeout=15, | |
| ) | |
| r = httpx.get(f"{base_url}/grader", timeout=15) | |
| ok = r.status_code == 200 | |
| check("GET /grader returns 200", ok) | |
| if ok: | |
| data = r.json() | |
| score = data.get("final_score", data.get("grader_result", {}).get("final_score")) | |
| check(" final_score present and in [0.0, 1.0]", | |
| score is not None and 0.0 <= float(score) <= 1.0, | |
| f"final_score={score}") | |
| except Exception as e: | |
| check("GET /grader", False, str(e)) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 7. /baseline β runs without error, returns 3 task scores | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def test_baseline(base_url: str): | |
| print("\n[7] /baseline Endpoint") | |
| try: | |
| r = httpx.post(f"{base_url}/baseline", timeout=60) | |
| ok = r.status_code == 200 | |
| check("POST /baseline returns 200", ok, f"status={r.status_code}") | |
| if ok: | |
| data = r.json() | |
| results_data = data.get("results", {}) | |
| check(" task1 score present", "task1_keyword_extraction" in results_data) | |
| check(" task2 score present", "task2_bullet_rewrite" in results_data) | |
| check(" task3 score present", "task3_full_application" in results_data) | |
| overall = data.get("overall_average_score", 0.0) | |
| check(" overall_average_score in [0.0, 1.0]", | |
| 0.0 <= overall <= 1.0, f"overall={overall}") | |
| for task, r_data in results_data.items(): | |
| score = r_data.get("score", -1) | |
| check(f" {task} score in [0.0, 1.0]", | |
| 0.0 <= score <= 1.0, f"score={score}") | |
| except Exception as e: | |
| check("POST /baseline", False, str(e)) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 8. Grader scores in range [0.0, 1.0] for all tasks | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def test_grader_ranges(base_url: str): | |
| print("\n[8] Grader Score Ranges (all tasks)") | |
| task_actions = { | |
| "task1_keyword_extraction": { | |
| "action_type": "extract_keywords", | |
| "hard_skills": ["SQL", "Python", "Tableau", "BigQuery"], | |
| "soft_skills": ["stakeholder communication"], | |
| "experience_years": 5, | |
| "rewritten_bullet": "", "content": "", | |
| }, | |
| "task2_bullet_rewrite": { | |
| "action_type": "rewrite_bullet", | |
| "hard_skills": [], "soft_skills": [], "experience_years": 0, | |
| "rewritten_bullet": "Developed Python dashboards tracking 20 KPIs, reducing manual reporting by 45%.", | |
| "content": "", | |
| }, | |
| } | |
| for task_id, action in task_actions.items(): | |
| try: | |
| httpx.post(f"{base_url}/reset", json={"task_id": task_id}, timeout=15) | |
| r = httpx.post(f"{base_url}/step", json={"action": action}, timeout=15) | |
| data = r.json() | |
| reward = data.get("reward", -1) | |
| check(f" {task_id} reward in [0.0, 1.0]", | |
| 0.0 <= reward <= 1.0, f"reward={reward}") | |
| except Exception as e: | |
| check(f" {task_id} grader", False, str(e)) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 9. openenv.yaml present and valid | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def test_yaml(): | |
| print("\n[9] openenv.yaml") | |
| import os | |
| try: | |
| path = os.path.join(os.path.dirname(__file__), "openenv.yaml") | |
| exists = os.path.exists(path) | |
| check("openenv.yaml exists at project root", exists) | |
| if exists: | |
| with open(path) as f: | |
| content = f.read() | |
| check(" contains 'name'", "name:" in content) | |
| check(" contains 'version'", "version:" in content) | |
| check(" contains 'sdk'", "sdk:" in content) | |
| except Exception as e: | |
| check("openenv.yaml readable", False, str(e)) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 10. Dockerfile present | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def test_dockerfile(): | |
| print("\n[10] Dockerfile") | |
| import os | |
| path = os.path.join(os.path.dirname(__file__), "server", "Dockerfile") | |
| exists = os.path.exists(path) | |
| check("server/Dockerfile exists", exists) | |
| if exists: | |
| with open(path) as f: | |
| content = f.read() | |
| check(" contains FROM (base image)", "FROM " in content) | |
| check(" contains EXPOSE 8000", "EXPOSE 8000" in content) | |
| check(" contains CMD or ENTRYPOINT", "CMD " in content or "ENTRYPOINT " in content) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 11. inference.py is in project root | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def test_inference_location(): | |
| print("\n[11] inference.py") | |
| import os | |
| path = os.path.join(os.path.dirname(__file__), "inference.py") | |
| exists = os.path.exists(path) | |
| check("inference.py exists at PROJECT ROOT (not a subfolder)", exists) | |
| if exists: | |
| with open(path, encoding="utf-8") as f: | |
| content = f.read() | |
| check(" uses HF_TOKEN or OPENAI_API_KEY", | |
| "HF_TOKEN" in content or "OPENAI_API_KEY" in content) | |
| check(" uses MODEL_NAME", "MODEL_NAME" in content) | |
| check(" uses API_BASE_URL", "API_BASE_URL" in content) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 12. No heavy ML deps in server/requirements.txt | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def test_deps(): | |
| print("\n[12] Dependency Safety") | |
| import os | |
| path = os.path.join(os.path.dirname(__file__), "server", "requirements.txt") | |
| if os.path.exists(path): | |
| with open(path, encoding="utf-8") as f: | |
| content = f.read().lower() | |
| BANNED = ["sentence-transformers", "torch", "tensorflow", "transformers", | |
| "spacy", "nltk", "gensim", "sklearn", "scikit-learn"] | |
| for dep in BANNED: | |
| check(f" server/requirements.txt does NOT contain '{dep}'", | |
| dep not in content, f"Found: {dep}") | |
| else: | |
| check("server/requirements.txt exists", False) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # MAIN | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def main(): | |
| parser = argparse.ArgumentParser(description="ResumeEnv Pre-Submission Validator") | |
| parser.add_argument("--base-url", default="http://localhost:8000", | |
| help="Base URL of running ResumeEnv server") | |
| args = parser.parse_args() | |
| base = args.base_url.rstrip("/") | |
| print("=" * 60) | |
| print(f"ResumeEnv Validator β {base}") | |
| print("=" * 60) | |
| # Static checks (no server needed) | |
| test_yaml() | |
| test_dockerfile() | |
| test_inference_location() | |
| test_deps() | |
| # Live server checks | |
| test_health(base) | |
| test_reset(base) | |
| test_step(base) | |
| test_state(base) | |
| test_tasks(base) | |
| test_grader(base) | |
| test_baseline(base) | |
| test_grader_ranges(base) | |
| # Summary | |
| passed = sum(1 for _, p, _ in results if p) | |
| total = len(results) | |
| failed = [(n, d) for n, p, d in results if not p] | |
| print("\n" + "=" * 60) | |
| print(f"RESULT: {passed}/{total} checks passed") | |
| if failed: | |
| print(f"\n{FAIL} Failed checks:") | |
| for name, detail in failed: | |
| print(f" β’ {name}") | |
| if detail: | |
| print(f" {detail}") | |
| print("\nβ οΈ Fix all failures before submitting.") | |
| sys.exit(1) | |
| else: | |
| print("\nπ All checks passed! Safe to submit your HF Spaces URL.") | |
| sys.exit(0) | |
| if __name__ == "__main__": | |
| main() | |