recruitopenenv / baseline_random.py
bathientran's picture
Upload folder using huggingface_hub
be37527 verified
"""Random agent baseline — establishes the floor for reward."""
import random
from recruitopenenv import RecruitopenenvEnv, RecruitopenenvAction
TOOLS_ACTIONS = {
"crm": ["read_candidate", "update_stage", "update_field", "add_note"],
"messaging": ["send_message", "read_reply"],
"approval": ["request_approval", "check_approval"],
"workflow": ["wait"],
}
TOPICS = [
"greeting", "call", "experience", "home_time", "pay", "equipment",
"route", "deal_breakers", "availability", "violations", "medical_card",
"references", "pitch", "offer", "negotiate_pay", "negotiate_home_time",
"signing_bonus", "address_concern",
]
STAGES = ["contacted", "interested", "approval_pending", "offer_sent", "hired", "lost"]
NUM_EPISODES = 100
def random_action():
tool = random.choice(list(TOOLS_ACTIONS.keys()))
action = random.choice(TOOLS_ACTIONS[tool])
topic = ""
job_id = -1
stage = ""
field = ""
value = ""
if tool == "messaging" and action == "send_message":
topic = random.choice(TOPICS)
if topic in ("pitch", "offer"):
job_id = random.randint(0, 5)
elif tool == "crm" and action == "update_stage":
stage = random.choice(STAGES)
elif tool == "crm" and action == "update_field":
field = random.choice(["cdl_class", "years_exp", "home_time_pref"])
value = "A"
elif tool == "approval" and action == "request_approval":
job_id = random.randint(0, 5)
return RecruitopenenvAction(
tool=tool, action=action, topic=topic,
job_id=job_id, stage=stage, field=field, value=value,
)
def run_baseline():
rewards = []
successes = 0
total_steps = 0
with RecruitopenenvEnv(base_url="http://localhost:8000").sync() as env:
for ep in range(NUM_EPISODES):
result = env.reset()
ep_reward = 0.0
steps = 0
while not result.done and steps < 100:
action = random_action()
result = env.step(action)
ep_reward += result.reward
steps += 1
rewards.append(ep_reward)
total_steps += steps
if result.observation.stage == "hired":
successes += 1
if (ep + 1) % 10 == 0:
avg_so_far = sum(rewards) / len(rewards)
print(f" Episode {ep+1}: reward={ep_reward:.1f}, running avg={avg_so_far:.2f}")
avg_reward = sum(rewards) / len(rewards)
avg_steps = total_steps / NUM_EPISODES
print("\n========== RANDOM BASELINE ==========")
print(f"Episodes: {NUM_EPISODES}")
print(f"Avg reward: {avg_reward:.2f}")
print(f"Min reward: {min(rewards):.2f}")
print(f"Max reward: {max(rewards):.2f}")
print(f"Hire rate: {successes}/{NUM_EPISODES} ({100*successes/NUM_EPISODES:.1f}%)")
print(f"Avg steps/episode: {avg_steps:.1f}")
print("======================================")
if __name__ == "__main__":
run_baseline()