File size: 4,534 Bytes
0c51b93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import re
from typing import Any, Dict, List, Tuple

from openai import OpenAI

from sotopia_rl.prompter.one_pass_instructions import ATTRIBUTION_INSTRUCTIONS_DICT

REGEX = "^Utterance (?:[0-9]|[1-9][0-9]) by {agent}$"

def check_regex_formatting(target: str, agent: str, regex: str = REGEX) -> bool:
    return bool(re.match(regex.format(agent=agent), target))

def extract_turn_number(text: str) -> int:
    match = re.search(r"Utterance ([0-9]+) by", text)
    if match:
        return int(match.group(1))
    else:
        return -1

def openai_call(prompt: str, model: str = "gpt-3.5-turbo") -> str | None:
    client = OpenAI()
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content

def get_attribution_formatting_instructions(agent: str) -> str:
    return f"""
Your format should strictly follow the regex pattern below:
{REGEX.format(agent=agent)}
"""

def get_single_attribution_prompt(
    conversation: List[Tuple[str, str]],
    goal: str,
    score: float,
    agent: str,
    attribution_instruction: str
) -> Tuple[str, Dict[str, List[Any]]]:
    """Generate a single prompt for GPT based on the entire conversation, agent's goals, and final goal achieving score."""
    prompt = f"{attribution_instruction}\n\n"
    prompt += f"Chosen agent for Evaluation: {agent}\n\n"
    prompt += f"Agent's Goal: {goal}\n\n"
    prompt += "Conversation:\n"
    for i, (speaker, utterance) in enumerate(conversation):
        prompt += f"Utterance {i//2} by {speaker}: {utterance}\n"
    prompt += "\n" + get_attribution_formatting_instructions(agent)
    return prompt

def assign_attributions_for_conversation(
    prompt: str, conversation: list, agent: str, llm_name: str = "gpt-3.5-turbo"
) -> Dict[str, int] | Any:
    for i in range(5):
        uttr_attr_dict = {}
        uttr_count = 0
        for j, (speaker, _) in enumerate(conversation):
            if speaker == agent:
                uttr_count += 1
            uttr_attr_dict[f"Utterance {j//2} by {speaker}"] = 0
        response = openai_call(prompt, llm_name).strip()

        if response is None:
            print("Failed to get response from OpenAI; returning empty dictionary")
            return {}
        else:
            try:
                result = check_regex_formatting(response, agent)
                assert -1 < extract_turn_number(response) < uttr_count
                assert response in uttr_attr_dict
            except Exception:
                if i < 4:
                    print("Response does not match the regex expression; retrying")
                else:
                    print("Response length does not match the number of agent utterances after 5 attempts; returning original dictionary")
    uttr_attr_dict[response] = 1
    return uttr_attr_dict

def calc_reward(utter_attrib: float, goal_score: float) -> float:
    return utter_attrib * goal_score

def calc_attributed_reward(attributed_data: List[Dict[str, float | int]], attribution_instruction_name: str, goal_score: float | int) -> List[Dict[str, Any]]:
    utterance_reward_map = {}
    for k, v in attributed_data.items():
        utterance_reward_map[k] = {"reward": calc_reward(v, goal_score), "attribution": v}
    return utterance_reward_map


def fill_in_attribution_scores(
    conversation: List[Tuple[str, str]],
    raw_attribution_scores: Dict[str, Any],
    agent: str,
) -> Dict[str, Any]:
    attribution_dict = {}
    for i, (speaker, utterance) in enumerate(conversation):
        if speaker != agent:
            continue
        key = f"Utterance {i//2} by {speaker}"
        attribution_dict[key] = raw_attribution_scores.get(key, 0)
    return attribution_dict

# unified function
def get_attribution_single_conv(conversation, agent, goals, episode, llm_name, attribution_instruction_name):
    attribution_instruction = ATTRIBUTION_INSTRUCTIONS_DICT[attribution_instruction_name]
    prompt = get_single_attribution_prompt(
        conversation, goals[agent], episode["scores"][agent], agent, attribution_instruction=attribution_instruction
    )
    attribution_scores = assign_attributions_for_conversation(
        prompt, conversation, agent, llm_name=llm_name
    )
    # attribution_scores = fill_in_attribution_scores(conversation, raw_attribution_scores, agent)
    attribution_rewards = calc_attributed_reward(attribution_scores, attribution_instruction_name, episode["scores"][agent])
    return attribution_rewards