import os import json import time from openai import OpenAI from gradio_client import Client from datetime import datetime # --- CONFIGURATION --- SPACE_URL = "https://arun-misra-my-env.hf.space/web" MODEL_NAME = "llama3:latest" # User specified OLLAMA_URL = "http://127.0.0.1:11434/v1" LOG_FILE = "screening_log.txt" STEPS_TO_RUN = 10 # --- SETUP --- client_llm = OpenAI(base_url=OLLAMA_URL, api_key="ollama") client_hf = Client(SPACE_URL) def log_result(step_num, obs, choice, reward, status): timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") log_entry = f""" [{timestamp}] --- STEP {step_num} --- CANDIDATE ID: {obs.get('candidate_id')} JOB TITLE: {obs.get('job_title')} AI DECISION: {choice.get('decision')} AI REASONING: {choice.get('reasoning')} REWARD RECEIVED: {reward} STATUS: {status} ------------------------------------------ """ with open(LOG_FILE, "a", encoding="utf-8") as f: f.write(log_entry) print(f"āœ… Step {step_num} Logged (Reward: {reward})") def run_multi_step_test(): print(f"šŸš€ Starting {STEPS_TO_RUN}-step AI Screening Test...") print(f"Model: {MODEL_NAME} | Space: {SPACE_URL}") # Initialize Log with open(LOG_FILE, "w", encoding="utf-8") as f: f.write(f"--- AI SCREENING LOG ({datetime.now()}) ---\n") # 1. Reset Environment reset_data = client_hf.predict(api_name="/reset_env") try: obs = json.loads(reset_data[1])["observation"] except Exception as e: print(f"āŒ Server crashed during reset! Raw HuggingFace Output:\n{reset_data}") return for i in range(1, STEPS_TO_RUN + 1): # 2. Prompt LLM # Parse weighted criteria for structured prompt try: macro_criteria = json.loads(obs.get('macro_criteria', '{}')) micro_criteria = json.loads(obs.get('micro_criteria', '{}')) except Exception: macro_criteria = {} micro_criteria = {} # Format criteria into readable grading rubrics macro_rubric = "\n".join([f" - {k} (weight: {v}%)" for k, v in macro_criteria.items()]) if macro_criteria else " - Not specified" micro_rubric = "\n".join([f" - {k} (weight: {v}%)" for k, v in micro_criteria.items()]) if micro_criteria else " - Not specified" prompt = f"""You are a Senior HR Expert conducting structured resume screening. You must evaluate this candidate using the EXACT same methodology as a professional GPT-4-powered ATS system. === PRIMARY RULE: JUNK DETECTION === If the resume contains nonsensical text, gibberish (e.g., "blorph snizzle"), or is clearly not a professional resume, immediately stop and choose "reject" with reasoning "Junk/Spam submission detected." === JOB DESCRIPTION === {obs.get('job_description')} === SCORING RUBRIC === High-Level Criteria (weights must sum to 100): {macro_rubric} Detailed skill criteria (weights must sum to 100): {micro_rubric} === CANDIDATE RESUME === {obs.get('resume_text')} === YOUR ANALYTICAL TASK === Follow these steps EXACTLY: 1. DETECT JUNK: Is this a real resume? If not, REJECT. 2. INDIVIDUAL SCORING: For each high-level criteria, assign a score of 0-10 based on the resume vs JD. 3. WEIGHTED MATH: Multiply each score by its weight to get the "Weighted Macro Average". 4. FINAL DECISION: - "shortlist" → Weighted macro average > 6.5/10 (Strong Match) - "flag_for_review" → Weighted macro average 4.0–6.5/10 (Borderline Case) - "reject" → Weighted macro average < 4.0/10 (Unqualified) Respond ONLY with valid JSON: {{ "analysis": {{ "junk_detected": false, "scores": {{ "criteria": score, ... }}, "weighted_average": total }}, "decision": "shortlist/reject/flag_for_review", "reasoning": "Brief, data-backed justification." }}""" try: response = client_llm.chat.completions.create( model=MODEL_NAME, messages=[{"role": "user", "content": prompt}], response_format={"type": "json_object"} ) ai_choice = json.loads(response.choices[0].message.content) except Exception as e: print(f"āŒ AI Error at Step {i}: {e}") break # 3. Step Environment step_data = client_hf.predict( param_0=ai_choice.get('decision', 'flag_for_review'), param_1=ai_choice.get('reasoning', 'No reasoning.'), param_2=f"STEP_{i}", api_name="/step_form" ) # 4. Parse Results try: raw_json = json.loads(step_data[1]) reward = raw_json.get("reward", 0.0) feedback = raw_json.get("observation", {}).get("status", "N/A") except Exception as e: print(f"āŒ Server returned invalid data (likely empty queue or crash). Raw output: {step_data}") break # 5. Log it log_result(i, obs, ai_choice, reward, feedback) # Prepare for next observation obs = raw_json["observation"] time.sleep(1) # Small delay for stability print(f"\nšŸ Test Complete. Results saved to: {LOG_FILE}") if __name__ == "__main__": run_multi_step_test()