Spaces:

arun-misra
/

my_env

Sleeping

App Files Files Community

my_env / multi_step_agent.py

arun-misra

Upload folder using huggingface_hub

b60c6a4 verified about 2 months ago

raw

history blame contribute delete

5.25 kB

	import os
	import json
	import time
	from openai import OpenAI
	from gradio_client import Client
	from datetime import datetime

	# --- CONFIGURATION ---
	SPACE_URL = "https://arun-misra-my-env.hf.space/web"
	MODEL_NAME = "llama3:latest" # User specified
	OLLAMA_URL = "http://127.0.0.1:11434/v1"
	LOG_FILE = "screening_log.txt"
	STEPS_TO_RUN = 10

	# --- SETUP ---
	client_llm = OpenAI(base_url=OLLAMA_URL, api_key="ollama")
	client_hf = Client(SPACE_URL)

	def log_result(step_num, obs, choice, reward, status):
	timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
	log_entry = f"""
	[{timestamp}] --- STEP {step_num} ---
	CANDIDATE ID: {obs.get('candidate_id')}
	JOB TITLE: {obs.get('job_title')}
	AI DECISION: {choice.get('decision')}
	AI REASONING: {choice.get('reasoning')}
	REWARD RECEIVED: {reward}
	STATUS: {status}
	------------------------------------------
	"""
	with open(LOG_FILE, "a", encoding="utf-8") as f:
	f.write(log_entry)
	print(f"✅ Step {step_num} Logged (Reward: {reward})")

	def run_multi_step_test():
	print(f"🚀 Starting {STEPS_TO_RUN}-step AI Screening Test...")
	print(f"Model: {MODEL_NAME} \| Space: {SPACE_URL}")

	# Initialize Log
	with open(LOG_FILE, "w", encoding="utf-8") as f:
	f.write(f"--- AI SCREENING LOG ({datetime.now()}) ---\n")

	# 1. Reset Environment
	reset_data = client_hf.predict(api_name="/reset_env")
	try:
	obs = json.loads(reset_data[1])["observation"]
	except Exception as e:
	print(f"❌ Server crashed during reset! Raw HuggingFace Output:\n{reset_data}")
	return

	for i in range(1, STEPS_TO_RUN + 1):
	# 2. Prompt LLM
	# Parse weighted criteria for structured prompt
	try:
	macro_criteria = json.loads(obs.get('macro_criteria', '{}'))
	micro_criteria = json.loads(obs.get('micro_criteria', '{}'))
	except Exception:
	macro_criteria = {}
	micro_criteria = {}

	# Format criteria into readable grading rubrics
	macro_rubric = "\n".join([f" - {k} (weight: {v}%)" for k, v in macro_criteria.items()]) if macro_criteria else " - Not specified"
	micro_rubric = "\n".join([f" - {k} (weight: {v}%)" for k, v in micro_criteria.items()]) if micro_criteria else " - Not specified"

	prompt = f"""You are a Senior HR Expert conducting structured resume screening. You must evaluate this candidate using the EXACT same methodology as a professional GPT-4-powered ATS system.

	=== PRIMARY RULE: JUNK DETECTION ===
	If the resume contains nonsensical text, gibberish (e.g., "blorph snizzle"), or is clearly not a professional resume, immediately stop and choose "reject" with reasoning "Junk/Spam submission detected."

	=== JOB DESCRIPTION ===
	{obs.get('job_description')}

	=== SCORING RUBRIC ===
	High-Level Criteria (weights must sum to 100):
	{macro_rubric}

	Detailed skill criteria (weights must sum to 100):
	{micro_rubric}

	=== CANDIDATE RESUME ===
	{obs.get('resume_text')}

	=== YOUR ANALYTICAL TASK ===
	Follow these steps EXACTLY:
	1. DETECT JUNK: Is this a real resume? If not, REJECT.
	2. INDIVIDUAL SCORING: For each high-level criteria, assign a score of 0-10 based on the resume vs JD.
	3. WEIGHTED MATH: Multiply each score by its weight to get the "Weighted Macro Average".
	4. FINAL DECISION:
	- "shortlist" → Weighted macro average > 6.5/10 (Strong Match)
	- "flag_for_review" → Weighted macro average 4.0–6.5/10 (Borderline Case)
	- "reject" → Weighted macro average < 4.0/10 (Unqualified)

	Respond ONLY with valid JSON:
	{{
	"analysis": {{
	"junk_detected": false,
	"scores": {{ "criteria": score, ... }},
	"weighted_average": total
	}},
	"decision": "shortlist/reject/flag_for_review",
	"reasoning": "Brief, data-backed justification."
	}}"""

	try:
	response = client_llm.chat.completions.create(
	model=MODEL_NAME,
	messages=[{"role": "user", "content": prompt}],
	response_format={"type": "json_object"}
	)
	ai_choice = json.loads(response.choices[0].message.content)
	except Exception as e:
	print(f"❌ AI Error at Step {i}: {e}")
	break

	# 3. Step Environment
	step_data = client_hf.predict(
	param_0=ai_choice.get('decision', 'flag_for_review'),
	param_1=ai_choice.get('reasoning', 'No reasoning.'),
	param_2=f"STEP_{i}",
	api_name="/step_form"
	)

	# 4. Parse Results
	try:
	raw_json = json.loads(step_data[1])
	reward = raw_json.get("reward", 0.0)
	feedback = raw_json.get("observation", {}).get("status", "N/A")
	except Exception as e:
	print(f"❌ Server returned invalid data (likely empty queue or crash). Raw output: {step_data}")
	break

	# 5. Log it
	log_result(i, obs, ai_choice, reward, feedback)

	# Prepare for next observation
	obs = raw_json["observation"]
	time.sleep(1) # Small delay for stability

	print(f"\n🏁 Test Complete. Results saved to: {LOG_FILE}")

	if __name__ == "__main__":
	run_multi_step_test()