File size: 2,074 Bytes
0c51b93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import json
import os
from collections import OrderedDict
from typing import Any, Dict, List, Tuple


def parse_conversation(
    episode: Dict[str, Any]
) -> Tuple[List[Tuple[str, str]], Dict[str, Dict[str, Any]]]:
    """Extract and parse conversation and goals from the episode."""
    conversation = episode["social_interactions"].split("\n\n")
    goals = episode["social_goals"]
    agent1, agent2 = list(goals.keys())
    parsed_conversation = []
    for utterance in conversation:
        if utterance.startswith(agent1):
            speaker = agent1
        elif utterance.startswith(agent2):
            speaker = agent2
        else:
            continue  # Skip any unparsable utterances
        parsed_conversation.append(
            (speaker, utterance[len(speaker) + 1 :].strip())
        )  # Strip the speaker from the utterance
    return parsed_conversation, goals


def extract_goal_scores(data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    new_data = []
    for episode in data:
        scores = {}
        for i in range(2):
            agent = list(episode["agents_background"].keys())[i]
            scores[agent] = episode["rewards"][i]["goal"]
        new_episode = {**episode, "scores": scores}
        new_data.append(new_episode)
    return new_data


def add_score(
    data_dir: str,
    input_file: str = "example_episodes.jsonl",
    output_file: str = "example_episodes_with_scores.jsonl",
) -> None:
    with open(os.path.join(data_dir, input_file), "r") as f:
        data = [json.loads(line, object_pairs_hook=OrderedDict) for line in f]

    new_data = []
    for episode in data:
        scores = {}
        for i in range(2):
            agent = list(episode["agents_background"].keys())[i]
            # import pdb; pdb.set_trace()
            scores[agent] = episode["rewards"][i][1]["goal"]
        new_episode = {**episode, "scores": scores}
        new_data.append(new_episode)

    with open(os.path.join(data_dir, output_file), "w") as f:
        for episode in new_data:
            f.write(json.dumps(episode) + "\n")