File size: 3,052 Bytes
be37527
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
"""Random agent baseline — establishes the floor for reward."""

import random
from recruitopenenv import RecruitopenenvEnv, RecruitopenenvAction

TOOLS_ACTIONS = {
    "crm": ["read_candidate", "update_stage", "update_field", "add_note"],
    "messaging": ["send_message", "read_reply"],
    "approval": ["request_approval", "check_approval"],
    "workflow": ["wait"],
}

TOPICS = [
    "greeting", "call", "experience", "home_time", "pay", "equipment",
    "route", "deal_breakers", "availability", "violations", "medical_card",
    "references", "pitch", "offer", "negotiate_pay", "negotiate_home_time",
    "signing_bonus", "address_concern",
]

STAGES = ["contacted", "interested", "approval_pending", "offer_sent", "hired", "lost"]

NUM_EPISODES = 100


def random_action():
    tool = random.choice(list(TOOLS_ACTIONS.keys()))
    action = random.choice(TOOLS_ACTIONS[tool])

    topic = ""
    job_id = -1
    stage = ""
    field = ""
    value = ""

    if tool == "messaging" and action == "send_message":
        topic = random.choice(TOPICS)
        if topic in ("pitch", "offer"):
            job_id = random.randint(0, 5)
    elif tool == "crm" and action == "update_stage":
        stage = random.choice(STAGES)
    elif tool == "crm" and action == "update_field":
        field = random.choice(["cdl_class", "years_exp", "home_time_pref"])
        value = "A"
    elif tool == "approval" and action == "request_approval":
        job_id = random.randint(0, 5)

    return RecruitopenenvAction(
        tool=tool, action=action, topic=topic,
        job_id=job_id, stage=stage, field=field, value=value,
    )


def run_baseline():
    rewards = []
    successes = 0
    total_steps = 0

    with RecruitopenenvEnv(base_url="http://localhost:8000").sync() as env:
        for ep in range(NUM_EPISODES):
            result = env.reset()
            ep_reward = 0.0
            steps = 0

            while not result.done and steps < 100:
                action = random_action()
                result = env.step(action)
                ep_reward += result.reward
                steps += 1

            rewards.append(ep_reward)
            total_steps += steps

            if result.observation.stage == "hired":
                successes += 1

            if (ep + 1) % 10 == 0:
                avg_so_far = sum(rewards) / len(rewards)
                print(f"  Episode {ep+1}: reward={ep_reward:.1f}, running avg={avg_so_far:.2f}")

    avg_reward = sum(rewards) / len(rewards)
    avg_steps = total_steps / NUM_EPISODES

    print("\n========== RANDOM BASELINE ==========")
    print(f"Episodes:           {NUM_EPISODES}")
    print(f"Avg reward:         {avg_reward:.2f}")
    print(f"Min reward:         {min(rewards):.2f}")
    print(f"Max reward:         {max(rewards):.2f}")
    print(f"Hire rate:          {successes}/{NUM_EPISODES} ({100*successes/NUM_EPISODES:.1f}%)")
    print(f"Avg steps/episode:  {avg_steps:.1f}")
    print("======================================")


if __name__ == "__main__":
    run_baseline()