my_env / multi_step_agent.py
arun-misra's picture
Upload folder using huggingface_hub
b60c6a4 verified
import os
import json
import time
from openai import OpenAI
from gradio_client import Client
from datetime import datetime
# --- CONFIGURATION ---
SPACE_URL = "https://arun-misra-my-env.hf.space/web"
MODEL_NAME = "llama3:latest" # User specified
OLLAMA_URL = "http://127.0.0.1:11434/v1"
LOG_FILE = "screening_log.txt"
STEPS_TO_RUN = 10
# --- SETUP ---
client_llm = OpenAI(base_url=OLLAMA_URL, api_key="ollama")
client_hf = Client(SPACE_URL)
def log_result(step_num, obs, choice, reward, status):
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
log_entry = f"""
[{timestamp}] --- STEP {step_num} ---
CANDIDATE ID: {obs.get('candidate_id')}
JOB TITLE: {obs.get('job_title')}
AI DECISION: {choice.get('decision')}
AI REASONING: {choice.get('reasoning')}
REWARD RECEIVED: {reward}
STATUS: {status}
------------------------------------------
"""
with open(LOG_FILE, "a", encoding="utf-8") as f:
f.write(log_entry)
print(f"βœ… Step {step_num} Logged (Reward: {reward})")
def run_multi_step_test():
print(f"πŸš€ Starting {STEPS_TO_RUN}-step AI Screening Test...")
print(f"Model: {MODEL_NAME} | Space: {SPACE_URL}")
# Initialize Log
with open(LOG_FILE, "w", encoding="utf-8") as f:
f.write(f"--- AI SCREENING LOG ({datetime.now()}) ---\n")
# 1. Reset Environment
reset_data = client_hf.predict(api_name="/reset_env")
try:
obs = json.loads(reset_data[1])["observation"]
except Exception as e:
print(f"❌ Server crashed during reset! Raw HuggingFace Output:\n{reset_data}")
return
for i in range(1, STEPS_TO_RUN + 1):
# 2. Prompt LLM
# Parse weighted criteria for structured prompt
try:
macro_criteria = json.loads(obs.get('macro_criteria', '{}'))
micro_criteria = json.loads(obs.get('micro_criteria', '{}'))
except Exception:
macro_criteria = {}
micro_criteria = {}
# Format criteria into readable grading rubrics
macro_rubric = "\n".join([f" - {k} (weight: {v}%)" for k, v in macro_criteria.items()]) if macro_criteria else " - Not specified"
micro_rubric = "\n".join([f" - {k} (weight: {v}%)" for k, v in micro_criteria.items()]) if micro_criteria else " - Not specified"
prompt = f"""You are a Senior HR Expert conducting structured resume screening. You must evaluate this candidate using the EXACT same methodology as a professional GPT-4-powered ATS system.
=== PRIMARY RULE: JUNK DETECTION ===
If the resume contains nonsensical text, gibberish (e.g., "blorph snizzle"), or is clearly not a professional resume, immediately stop and choose "reject" with reasoning "Junk/Spam submission detected."
=== JOB DESCRIPTION ===
{obs.get('job_description')}
=== SCORING RUBRIC ===
High-Level Criteria (weights must sum to 100):
{macro_rubric}
Detailed skill criteria (weights must sum to 100):
{micro_rubric}
=== CANDIDATE RESUME ===
{obs.get('resume_text')}
=== YOUR ANALYTICAL TASK ===
Follow these steps EXACTLY:
1. DETECT JUNK: Is this a real resume? If not, REJECT.
2. INDIVIDUAL SCORING: For each high-level criteria, assign a score of 0-10 based on the resume vs JD.
3. WEIGHTED MATH: Multiply each score by its weight to get the "Weighted Macro Average".
4. FINAL DECISION:
- "shortlist" β†’ Weighted macro average > 6.5/10 (Strong Match)
- "flag_for_review" β†’ Weighted macro average 4.0–6.5/10 (Borderline Case)
- "reject" β†’ Weighted macro average < 4.0/10 (Unqualified)
Respond ONLY with valid JSON:
{{
"analysis": {{
"junk_detected": false,
"scores": {{ "criteria": score, ... }},
"weighted_average": total
}},
"decision": "shortlist/reject/flag_for_review",
"reasoning": "Brief, data-backed justification."
}}"""
try:
response = client_llm.chat.completions.create(
model=MODEL_NAME,
messages=[{"role": "user", "content": prompt}],
response_format={"type": "json_object"}
)
ai_choice = json.loads(response.choices[0].message.content)
except Exception as e:
print(f"❌ AI Error at Step {i}: {e}")
break
# 3. Step Environment
step_data = client_hf.predict(
param_0=ai_choice.get('decision', 'flag_for_review'),
param_1=ai_choice.get('reasoning', 'No reasoning.'),
param_2=f"STEP_{i}",
api_name="/step_form"
)
# 4. Parse Results
try:
raw_json = json.loads(step_data[1])
reward = raw_json.get("reward", 0.0)
feedback = raw_json.get("observation", {}).get("status", "N/A")
except Exception as e:
print(f"❌ Server returned invalid data (likely empty queue or crash). Raw output: {step_data}")
break
# 5. Log it
log_result(i, obs, ai_choice, reward, feedback)
# Prepare for next observation
obs = raw_json["observation"]
time.sleep(1) # Small delay for stability
print(f"\n🏁 Test Complete. Results saved to: {LOG_FILE}")
if __name__ == "__main__":
run_multi_step_test()