basketball_code / sotopia_rl /utils /preprocess.py

Upload folder using huggingface_hub

0c51b93 verified about 2 months ago

2.07 kB

	import json
	import os
	from collections import OrderedDict
	from typing import Any, Dict, List, Tuple


	def parse_conversation(
	episode: Dict[str, Any]
	) -> Tuple[List[Tuple[str, str]], Dict[str, Dict[str, Any]]]:
	"""Extract and parse conversation and goals from the episode."""
	conversation = episode["social_interactions"].split("\n\n")
	goals = episode["social_goals"]
	agent1, agent2 = list(goals.keys())
	parsed_conversation = []
	for utterance in conversation:
	if utterance.startswith(agent1):
	speaker = agent1
	elif utterance.startswith(agent2):
	speaker = agent2
	else:
	continue # Skip any unparsable utterances
	parsed_conversation.append(
	(speaker, utterance[len(speaker) + 1 :].strip())
	) # Strip the speaker from the utterance
	return parsed_conversation, goals


	def extract_goal_scores(data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
	new_data = []
	for episode in data:
	scores = {}
	for i in range(2):
	agent = list(episode["agents_background"].keys())[i]
	scores[agent] = episode["rewards"][i]["goal"]
	new_episode = {**episode, "scores": scores}
	new_data.append(new_episode)
	return new_data


	def add_score(
	data_dir: str,
	input_file: str = "example_episodes.jsonl",
	output_file: str = "example_episodes_with_scores.jsonl",
	) -> None:
	with open(os.path.join(data_dir, input_file), "r") as f:
	data = [json.loads(line, object_pairs_hook=OrderedDict) for line in f]

	new_data = []
	for episode in data:
	scores = {}
	for i in range(2):
	agent = list(episode["agents_background"].keys())[i]
	# import pdb; pdb.set_trace()
	scores[agent] = episode["rewards"][i][1]["goal"]
	new_episode = {**episode, "scores": scores}
	new_data.append(new_episode)

	with open(os.path.join(data_dir, output_file), "w") as f:
	for episode in new_data:
	f.write(json.dumps(episode) + "\n")