|
|
import json |
|
|
import os |
|
|
|
|
|
import click |
|
|
import jsonlines |
|
|
from rich import print |
|
|
|
|
|
|
|
|
@click.command() |
|
|
@click.option("--data_dir", type=str, required=True, help="Directory containing data files.") |
|
|
@click.option("--input_file", type=str, required=True, help="Path to the raw JSON file.") |
|
|
@click.option("--output_file", type=str, required=True, help="Path to the processed JSON file.") |
|
|
def main(data_dir: str, input_file: str, output_file: str) -> None: |
|
|
""" |
|
|
Process the JSON file containing Sotopia episodes. |
|
|
""" |
|
|
with open(os.path.join(data_dir, input_file), "r") as f: |
|
|
episodes = [json.loads(line) for line in f] |
|
|
print("[bold green]Successfully loaded episodes:[/bold green]") |
|
|
|
|
|
behavior_cloning_episodes = [] |
|
|
for episode in episodes: |
|
|
if episode["experiment_model_name_pairs"][0] == "gpt-4" and episode["experiment_model_name_pairs"][1] == "gpt-4": |
|
|
behavior_cloning_episodes.append(episode) |
|
|
print(f"[bold green]Successfully filtered {len(behavior_cloning_episodes)} episodes [/bold green]") |
|
|
with open(os.path.join(data_dir, output_file), "w") as f: |
|
|
jsonlines.Writer(f).write_all(behavior_cloning_episodes) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|