| import json | |
| import os | |
| from typing import Any, Dict | |
| def select_qualifying_episodes( | |
| episodes: list[Dict[str, Any]] | |
| ) -> list[Dict[str, Any]]: | |
| num_turns = [] | |
| len_episodes = [] | |
| for episode in episodes: | |
| num_turns.append(episode["social_interactions"].count("said:")) | |
| len_episodes.append(len(episode["social_interactions"])) | |
| qualifying_episodes = [] | |
| for episode in episodes: | |
| if ( | |
| ( | |
| episode["rewards"][0]["goal"] >= 8 | |
| or episode["rewards"][0]["goal"] <= 2 | |
| ) | |
| and ( | |
| episode["rewards"][1]["goal"] >= 8 | |
| or episode["rewards"][1]["goal"] <= 2 | |
| ) | |
| and (episode["social_interactions"].count("said:") > 2) | |
| and ( | |
| episode["experiment_model_name_pairs"][1] == "gpt-4" | |
| or episode["experiment_model_name_pairs"][1] == "gpt-3.5-turbo" | |
| ) | |
| and ( | |
| episode["experiment_model_name_pairs"][2] == "gpt-4" | |
| or episode["experiment_model_name_pairs"][2] == "gpt-3.5-turbo" | |
| ) | |
| ): | |
| qualifying_episodes.append(episode) | |
| return qualifying_episodes | |
| def create_non_repeating_sample_episodes( | |
| qualifying_episodes: list[Dict[str, Any]], num_episodes: int | |
| ) -> list[Dict[str, Any]]: | |
| if num_episodes == -1: | |
| return qualifying_episodes | |
| example_episodes = [] | |
| visited_codename = set() | |
| for episode in qualifying_episodes: | |
| if episode["codename"] in visited_codename: | |
| continue | |
| example_episodes.append(episode) | |
| visited_codename.add(episode["codename"]) | |
| if len(example_episodes) == num_episodes: | |
| break | |
| return example_episodes | |
| def sample_episodes(data_dir: str, num_episodes: int = 30) -> None: | |
| with open(os.path.join(data_dir, "sotopia_episodes_v1.jsonl"), "r") as f: | |
| episodes = [json.loads(line) for line in f] | |
| qualifying_episodes = select_qualifying_episodes(episodes) | |
| example_episodes = create_non_repeating_sample_episodes( | |
| qualifying_episodes, num_episodes=num_episodes | |
| ) | |
| with open(os.path.join(data_dir, "example_episodes.jsonl"), "w") as f: | |
| for episode in example_episodes: | |
| f.write(json.dumps(episode) + "\n") | |