import json from pathlib import Path def main() -> None: base_dir = Path("dataset/agent_tasks") output_dir = Path("train") output_dir.mkdir(exist_ok=True) fields = ["agent_id", "display_name", "persona", "description", "tools"] for dataset_dir in sorted(base_dir.iterdir()): if not dataset_dir.is_dir(): continue dataset_path = dataset_dir / "dataset.json" tasks_path = dataset_dir / "tasks.json" if not dataset_path.exists() or not tasks_path.exists(): continue dataset_data = json.loads(dataset_path.read_text(encoding="utf-8")) tasks_data = json.loads(tasks_path.read_text(encoding="utf-8")) agent_map = {agent["agent_id"]: agent for agent in dataset_data["agents"]} output_path = output_dir / f"sft_train_{dataset_dir.name}.jsonl" with output_path.open("w", encoding="utf-8") as output_file: for agent_entry in tasks_data["agents"]: agent = agent_map.get(agent_entry["agent_id"]) if not agent: continue agent_output = {key: agent[key] for key in fields} for task in agent_entry.get("tasks", []): record = {"input": task, "output": agent_output} output_file.write(json.dumps(record, ensure_ascii=False) + "\n") if __name__ == "__main__": main()