Spaces:

bathientran
/

recruitopenenv

Runtime error

App Files Files Community

recruitopenenv / baseline_random.py

bathientran

Upload folder using huggingface_hub

be37527 verified 6 days ago

raw

history blame contribute delete

3.05 kB

	"""Random agent baseline — establishes the floor for reward."""

	import random
	from recruitopenenv import RecruitopenenvEnv, RecruitopenenvAction

	TOOLS_ACTIONS = {
	"crm": ["read_candidate", "update_stage", "update_field", "add_note"],
	"messaging": ["send_message", "read_reply"],
	"approval": ["request_approval", "check_approval"],
	"workflow": ["wait"],
	}

	TOPICS = [
	"greeting", "call", "experience", "home_time", "pay", "equipment",
	"route", "deal_breakers", "availability", "violations", "medical_card",
	"references", "pitch", "offer", "negotiate_pay", "negotiate_home_time",
	"signing_bonus", "address_concern",
	]

	STAGES = ["contacted", "interested", "approval_pending", "offer_sent", "hired", "lost"]

	NUM_EPISODES = 100


	def random_action():
	tool = random.choice(list(TOOLS_ACTIONS.keys()))
	action = random.choice(TOOLS_ACTIONS[tool])

	topic = ""
	job_id = -1
	stage = ""
	field = ""
	value = ""

	if tool == "messaging" and action == "send_message":
	topic = random.choice(TOPICS)
	if topic in ("pitch", "offer"):
	job_id = random.randint(0, 5)
	elif tool == "crm" and action == "update_stage":
	stage = random.choice(STAGES)
	elif tool == "crm" and action == "update_field":
	field = random.choice(["cdl_class", "years_exp", "home_time_pref"])
	value = "A"
	elif tool == "approval" and action == "request_approval":
	job_id = random.randint(0, 5)

	return RecruitopenenvAction(
	tool=tool, action=action, topic=topic,
	job_id=job_id, stage=stage, field=field, value=value,
	)


	def run_baseline():
	rewards = []
	successes = 0
	total_steps = 0

	with RecruitopenenvEnv(base_url="http://localhost:8000").sync() as env:
	for ep in range(NUM_EPISODES):
	result = env.reset()
	ep_reward = 0.0
	steps = 0

	while not result.done and steps < 100:
	action = random_action()
	result = env.step(action)
	ep_reward += result.reward
	steps += 1

	rewards.append(ep_reward)
	total_steps += steps

	if result.observation.stage == "hired":
	successes += 1

	if (ep + 1) % 10 == 0:
	avg_so_far = sum(rewards) / len(rewards)
	print(f" Episode {ep+1}: reward={ep_reward:.1f}, running avg={avg_so_far:.2f}")

	avg_reward = sum(rewards) / len(rewards)
	avg_steps = total_steps / NUM_EPISODES

	print("\n========== RANDOM BASELINE ==========")
	print(f"Episodes: {NUM_EPISODES}")
	print(f"Avg reward: {avg_reward:.2f}")
	print(f"Min reward: {min(rewards):.2f}")
	print(f"Max reward: {max(rewards):.2f}")
	print(f"Hire rate: {successes}/{NUM_EPISODES} ({100*successes/NUM_EPISODES:.1f}%)")
	print(f"Avg steps/episode: {avg_steps:.1f}")
	print("======================================")


	if __name__ == "__main__":
	run_baseline()