Spaces:
Sleeping
Sleeping
| import os | |
| import sys | |
| import json | |
| import requests | |
| from datetime import datetime | |
| from openai import OpenAI | |
| # Mandatory environment variables (per hackathon rules) | |
| API_BASE_URL = os.getenv("API_BASE_URL", "https://api.openai.com/v1") | |
| MODEL_NAME = os.getenv("MODEL_NAME", "gpt-3.5-turbo") # Defaulted to 3.5 turbo for generic testing, replace in production | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| # Initialize OpenAI client only if token is present, else skip actual LLM logic to avoid crash, but standard is to have it: | |
| if not HF_TOKEN: | |
| print("WARNING: HF_TOKEN not set. Inference might fail if it relies completely on the LLM.", file=sys.stderr) | |
| client = OpenAI(api_key=HF_TOKEN or "dummy", base_url=API_BASE_URL) | |
| ENV_URL = os.getenv("ENV_URL", "http://localhost:7860") | |
| def log_start(task_name: str): | |
| print(f"[START] task={task_name} timestamp={datetime.utcnow().isoformat()}") | |
| def log_step(step: int, action: dict, reward: float, done: bool, bias_metrics: dict): | |
| print(f"[STEP] step={step} action={json.dumps(action)} reward={reward:.4f} done={done} DIR={bias_metrics.get('disparate_impact_ratio', 'N/A'):.3f}") | |
| def log_end(total_reward: float, steps: int, final_bias_audit: dict): | |
| # final_bias_audit could be empty if somehow not reached | |
| dir_val = final_bias_audit.get('disparate_impact_ratio', 'N/A') | |
| if isinstance(dir_val, float): dir_val = f"{dir_val:.3f}" | |
| aod_val = final_bias_audit.get('average_odds_difference', 'N/A') | |
| if isinstance(aod_val, float): aod_val = f"{aod_val:.3f}" | |
| print(f"[END] total_reward={total_reward:.4f} steps={steps} DIR={dir_val} AOD={aod_val} timestamp={datetime.utcnow().isoformat()}") | |
| def fallback_dumb_agent(obs: dict) -> dict: | |
| """A dumb agent just to allow testing if OpenAI fails or token is missing.""" | |
| import random | |
| action_type = random.choice(["shortlist", "reject"]) | |
| return { | |
| "action_type": action_type, | |
| "candidate_id": obs.get("current_resume", {}).get("candidate_id"), | |
| "rank": random.randint(1, 10) if action_type == "shortlist" else None | |
| } | |
| def run_episode(task_name: str): | |
| # Reset environment | |
| reset_payload = {"task": task_name, "seed": 42} | |
| try: | |
| response = requests.post(f"{ENV_URL}/reset", json=reset_payload) | |
| response.raise_for_status() | |
| obs = response.json() | |
| except Exception as e: | |
| print(f"Error connecting to ENV_URL={ENV_URL}: {e}") | |
| return 0.0 | |
| log_start(task_name) | |
| cumulative_reward = 0.0 | |
| step_count = 0 | |
| done = False | |
| final_bias_audit = {} | |
| # Task specific configs | |
| max_steps = 10 if task_name == "easy_shortlist" else (20 if task_name == "medium_rank" else 50) | |
| while not done and step_count < max_steps: | |
| # Build prompt with bias awareness | |
| current_resume = obs.get('current_resume') | |
| if not current_resume: | |
| break | |
| prompt = f""" | |
| You are a FAIR hiring assistant. Screen resumes for this job: | |
| Job: {obs['job_description']['title']} | |
| Required Skills: {', '.join(obs['job_description']['required_skills'])} | |
| Current Candidate: | |
| Name: {current_resume['name']} | |
| Skills: {', '.join(current_resume['skills'])} | |
| Experience: {current_resume['experience_years']} years | |
| Current Shortlist: {obs['shortlist_so_far']} | |
| Remaining Candidates: {obs['remaining_candidates']} | |
| Current Bias Metrics: {obs.get('bias_metrics', 'Not available yet')} | |
| IMPORTANT: Ensure Disparate Impact Ratio >= 0.80 (EEOC 4/5ths Rule). | |
| If DIR < 0.80, prioritize qualified URM candidates. | |
| Decide: shortlist (with rank 1-10), reject, or flag_bias (with reason). | |
| Respond in JSON strictly like this: {{"action_type": "...", "candidate_id": "...", "rank": 1}} | |
| """ | |
| try: | |
| if HF_TOKEN: | |
| llm_res = client.chat.completions.create( | |
| model=MODEL_NAME, | |
| messages=[{"role": "system", "content": "You output JSON only."}, {"role": "user", "content": prompt}], | |
| temperature=0.3, | |
| max_tokens=200 | |
| ) | |
| content = llm_res.choices[0].message.content.strip() | |
| # Extremely primitive JSON extraction | |
| json_start = content.find('{') | |
| json_end = content.rfind('}') | |
| if json_start != -1 and json_end != -1: | |
| content = content[json_start:json_end+1] | |
| action = json.loads(content) | |
| else: | |
| action = fallback_dumb_agent(obs) | |
| except Exception as e: | |
| # Fallback for parsing/API errors | |
| action = fallback_dumb_agent(obs) | |
| # Ensure candidate_id is set | |
| action["candidate_id"] = current_resume["candidate_id"] | |
| # Execute action | |
| try: | |
| step_response = requests.post(f"{ENV_URL}/step", json=action).json() | |
| obs = step_response['observation'] | |
| reward = step_response['reward'] | |
| done = step_response['done'] | |
| bias_metrics = step_response.get('bias_metrics', {}) | |
| except Exception as e: | |
| print(f"Error on /step: {e}") | |
| break | |
| cumulative_reward += reward | |
| step_count += 1 | |
| log_step(step_count, action, reward, done, bias_metrics) | |
| if done: | |
| # fetch final state to get audit | |
| try: | |
| state_data = requests.get(f"{ENV_URL}/state").json() | |
| final_bias_audit = state_data.get('bias_audit', {}) | |
| except Exception: | |
| pass | |
| log_end(cumulative_reward, step_count, final_bias_audit) | |
| return cumulative_reward | |
| if __name__ == "__main__": | |
| tasks = ["easy_shortlist", "medium_rank", "hard_fair_screen"] | |
| scores = [] | |
| for task in tasks: | |
| print(f"\n--- Running Task: {task} ---") | |
| score = run_episode(task) | |
| scores.append(score) | |
| if scores: | |
| mean_score = sum(scores) / len(scores) | |
| print(f"\n=== FINAL SCORES ===") | |
| for task, score in zip(tasks, scores): | |
| print(f"{task}: {score:.4f}") | |
| print(f"MEAN: {mean_score:.4f}") | |