File size: 1,207 Bytes
0c51b93 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
import json
import os
import click
import jsonlines
from rich import print
@click.command()
@click.option("--data_dir", type=str, required=True, help="Directory containing data files.")
@click.option("--input_file", type=str, required=True, help="Path to the raw JSON file.")
@click.option("--output_file", type=str, required=True, help="Path to the processed JSON file.")
def main(data_dir: str, input_file: str, output_file: str) -> None:
"""
Process the JSON file containing Sotopia episodes.
"""
with open(os.path.join(data_dir, input_file), "r") as f:
episodes = [json.loads(line) for line in f]
print("[bold green]Successfully loaded episodes:[/bold green]")
behavior_cloning_episodes = []
for episode in episodes:
if episode["experiment_model_name_pairs"][0] == "gpt-4" and episode["experiment_model_name_pairs"][1] == "gpt-4":
behavior_cloning_episodes.append(episode)
print(f"[bold green]Successfully filtered {len(behavior_cloning_episodes)} episodes [/bold green]")
with open(os.path.join(data_dir, output_file), "w") as f:
jsonlines.Writer(f).write_all(behavior_cloning_episodes)
if __name__ == "__main__":
main()
|