Spaces:
Build error
Build error
File size: 777 Bytes
a8d4cdf | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | import json
input_file = "rl_trajectories.jsonl"
output_file = "fixed_dataset.jsonl"
def extract_parts(text):
try:
user_part = text.split("### Response:")[0].strip()
assistant_part = text.split("### Response:")[1].strip()
return user_part, assistant_part
except:
return None, None
with open(input_file, "r") as f_in, open(output_file, "w") as f_out:
for line in f_in:
data = json.loads(line)
text = data.get("text", "")
user, assistant = extract_parts(text)
if user and assistant:
new_entry = {
"user": user,
"assistant": assistant
}
f_out.write(json.dumps(new_entry) + "\n")
print("Done. Fixed dataset saved.") |