Spaces:
Build error
Build error
| import json | |
| input_file = "rl_trajectories.jsonl" | |
| output_file = "fixed_dataset.jsonl" | |
| def extract_parts(text): | |
| try: | |
| user_part = text.split("### Response:")[0].strip() | |
| assistant_part = text.split("### Response:")[1].strip() | |
| return user_part, assistant_part | |
| except: | |
| return None, None | |
| with open(input_file, "r") as f_in, open(output_file, "w") as f_out: | |
| for line in f_in: | |
| data = json.loads(line) | |
| text = data.get("text", "") | |
| user, assistant = extract_parts(text) | |
| if user and assistant: | |
| new_entry = { | |
| "user": user, | |
| "assistant": assistant | |
| } | |
| f_out.write(json.dumps(new_entry) + "\n") | |
| print("Done. Fixed dataset saved.") |