File size: 1,387 Bytes
0c51b93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from typing import Any, Dict, List, Tuple


def assign_attributions_for_conversation(
    agent: str,
    conversation: List[Tuple[str, str]],
    discounting_factor: float,
) -> Dict[str, int] | Any:
    count_utterances = 0
    for i, (speaker, _) in enumerate(conversation):
        if speaker == agent:
            count_utterances += 1

    attribution_dict = {}
    for i, (speaker, _) in enumerate(conversation):
        if speaker == agent:
            attribution_dict[f"Utterance {i//2} by {speaker}"] = discounting_factor ** (count_utterances - 1 - i//2)
    return attribution_dict

def calc_attributed_reward(attributed_data: List[Dict[str, float | int]], attribution_instruction_name: str, goal_score: float | int) -> List[Dict[str, Any]]:
    utterance_reward_map = {}
    for k, v in attributed_data.items():
        utterance_reward_map[k] = {"reward": v * goal_score, "attribution_instruction_name": attribution_instruction_name}
    return utterance_reward_map

# unified function
def get_attribution_single_conv(conversation, agent, goals, episode, llm_name, attribution_instruction_name):
    attribution_scores = assign_attributions_for_conversation(
        agent, conversation, discounting_factor=1.0
    )
    attribution_rewards = calc_attributed_reward(attribution_scores, attribution_instruction_name, episode["scores"][agent])
    return attribution_rewards