Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import time | |
| from openai import OpenAI | |
| from gradio_client import Client | |
| from datetime import datetime | |
| # --- CONFIGURATION --- | |
| SPACE_URL = "https://arun-misra-my-env.hf.space/web" | |
| MODEL_NAME = "llama3:latest" # User specified | |
| OLLAMA_URL = "http://127.0.0.1:11434/v1" | |
| LOG_FILE = "screening_log.txt" | |
| STEPS_TO_RUN = 10 | |
| # --- SETUP --- | |
| client_llm = OpenAI(base_url=OLLAMA_URL, api_key="ollama") | |
| client_hf = Client(SPACE_URL) | |
| def log_result(step_num, obs, choice, reward, status): | |
| timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| log_entry = f""" | |
| [{timestamp}] --- STEP {step_num} --- | |
| CANDIDATE ID: {obs.get('candidate_id')} | |
| JOB TITLE: {obs.get('job_title')} | |
| AI DECISION: {choice.get('decision')} | |
| AI REASONING: {choice.get('reasoning')} | |
| REWARD RECEIVED: {reward} | |
| STATUS: {status} | |
| ------------------------------------------ | |
| """ | |
| with open(LOG_FILE, "a", encoding="utf-8") as f: | |
| f.write(log_entry) | |
| print(f"β Step {step_num} Logged (Reward: {reward})") | |
| def run_multi_step_test(): | |
| print(f"π Starting {STEPS_TO_RUN}-step AI Screening Test...") | |
| print(f"Model: {MODEL_NAME} | Space: {SPACE_URL}") | |
| # Initialize Log | |
| with open(LOG_FILE, "w", encoding="utf-8") as f: | |
| f.write(f"--- AI SCREENING LOG ({datetime.now()}) ---\n") | |
| # 1. Reset Environment | |
| reset_data = client_hf.predict(api_name="/reset_env") | |
| try: | |
| obs = json.loads(reset_data[1])["observation"] | |
| except Exception as e: | |
| print(f"β Server crashed during reset! Raw HuggingFace Output:\n{reset_data}") | |
| return | |
| for i in range(1, STEPS_TO_RUN + 1): | |
| # 2. Prompt LLM | |
| # Parse weighted criteria for structured prompt | |
| try: | |
| macro_criteria = json.loads(obs.get('macro_criteria', '{}')) | |
| micro_criteria = json.loads(obs.get('micro_criteria', '{}')) | |
| except Exception: | |
| macro_criteria = {} | |
| micro_criteria = {} | |
| # Format criteria into readable grading rubrics | |
| macro_rubric = "\n".join([f" - {k} (weight: {v}%)" for k, v in macro_criteria.items()]) if macro_criteria else " - Not specified" | |
| micro_rubric = "\n".join([f" - {k} (weight: {v}%)" for k, v in micro_criteria.items()]) if micro_criteria else " - Not specified" | |
| prompt = f"""You are a Senior HR Expert conducting structured resume screening. You must evaluate this candidate using the EXACT same methodology as a professional GPT-4-powered ATS system. | |
| === PRIMARY RULE: JUNK DETECTION === | |
| If the resume contains nonsensical text, gibberish (e.g., "blorph snizzle"), or is clearly not a professional resume, immediately stop and choose "reject" with reasoning "Junk/Spam submission detected." | |
| === JOB DESCRIPTION === | |
| {obs.get('job_description')} | |
| === SCORING RUBRIC === | |
| High-Level Criteria (weights must sum to 100): | |
| {macro_rubric} | |
| Detailed skill criteria (weights must sum to 100): | |
| {micro_rubric} | |
| === CANDIDATE RESUME === | |
| {obs.get('resume_text')} | |
| === YOUR ANALYTICAL TASK === | |
| Follow these steps EXACTLY: | |
| 1. DETECT JUNK: Is this a real resume? If not, REJECT. | |
| 2. INDIVIDUAL SCORING: For each high-level criteria, assign a score of 0-10 based on the resume vs JD. | |
| 3. WEIGHTED MATH: Multiply each score by its weight to get the "Weighted Macro Average". | |
| 4. FINAL DECISION: | |
| - "shortlist" β Weighted macro average > 6.5/10 (Strong Match) | |
| - "flag_for_review" β Weighted macro average 4.0β6.5/10 (Borderline Case) | |
| - "reject" β Weighted macro average < 4.0/10 (Unqualified) | |
| Respond ONLY with valid JSON: | |
| {{ | |
| "analysis": {{ | |
| "junk_detected": false, | |
| "scores": {{ "criteria": score, ... }}, | |
| "weighted_average": total | |
| }}, | |
| "decision": "shortlist/reject/flag_for_review", | |
| "reasoning": "Brief, data-backed justification." | |
| }}""" | |
| try: | |
| response = client_llm.chat.completions.create( | |
| model=MODEL_NAME, | |
| messages=[{"role": "user", "content": prompt}], | |
| response_format={"type": "json_object"} | |
| ) | |
| ai_choice = json.loads(response.choices[0].message.content) | |
| except Exception as e: | |
| print(f"β AI Error at Step {i}: {e}") | |
| break | |
| # 3. Step Environment | |
| step_data = client_hf.predict( | |
| param_0=ai_choice.get('decision', 'flag_for_review'), | |
| param_1=ai_choice.get('reasoning', 'No reasoning.'), | |
| param_2=f"STEP_{i}", | |
| api_name="/step_form" | |
| ) | |
| # 4. Parse Results | |
| try: | |
| raw_json = json.loads(step_data[1]) | |
| reward = raw_json.get("reward", 0.0) | |
| feedback = raw_json.get("observation", {}).get("status", "N/A") | |
| except Exception as e: | |
| print(f"β Server returned invalid data (likely empty queue or crash). Raw output: {step_data}") | |
| break | |
| # 5. Log it | |
| log_result(i, obs, ai_choice, reward, feedback) | |
| # Prepare for next observation | |
| obs = raw_json["observation"] | |
| time.sleep(1) # Small delay for stability | |
| print(f"\nπ Test Complete. Results saved to: {LOG_FILE}") | |
| if __name__ == "__main__": | |
| run_multi_step_test() | |