youqiwong's picture
Upload folder using huggingface_hub
0c51b93 verified
import json
import os
from collections import OrderedDict
from typing import Any, Dict, List, Tuple
def parse_conversation(
episode: Dict[str, Any]
) -> Tuple[List[Tuple[str, str]], Dict[str, Dict[str, Any]]]:
"""Extract and parse conversation and goals from the episode."""
conversation = episode["social_interactions"].split("\n\n")
goals = episode["social_goals"]
agent1, agent2 = list(goals.keys())
parsed_conversation = []
for utterance in conversation:
if utterance.startswith(agent1):
speaker = agent1
elif utterance.startswith(agent2):
speaker = agent2
else:
continue # Skip any unparsable utterances
parsed_conversation.append(
(speaker, utterance[len(speaker) + 1 :].strip())
) # Strip the speaker from the utterance
return parsed_conversation, goals
def extract_goal_scores(data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
new_data = []
for episode in data:
scores = {}
for i in range(2):
agent = list(episode["agents_background"].keys())[i]
scores[agent] = episode["rewards"][i]["goal"]
new_episode = {**episode, "scores": scores}
new_data.append(new_episode)
return new_data
def add_score(
data_dir: str,
input_file: str = "example_episodes.jsonl",
output_file: str = "example_episodes_with_scores.jsonl",
) -> None:
with open(os.path.join(data_dir, input_file), "r") as f:
data = [json.loads(line, object_pairs_hook=OrderedDict) for line in f]
new_data = []
for episode in data:
scores = {}
for i in range(2):
agent = list(episode["agents_background"].keys())[i]
# import pdb; pdb.set_trace()
scores[agent] = episode["rewards"][i][1]["goal"]
new_episode = {**episode, "scores": scores}
new_data.append(new_episode)
with open(os.path.join(data_dir, output_file), "w") as f:
for episode in new_data:
f.write(json.dumps(episode) + "\n")