import json FILES = { "dataset/train.jsonl": "fasttext_train.txt", "dataset/validation.jsonl": "fasttext_validation.txt", "dataset/test.jsonl": "fasttext_test.txt", } for input_file, output_file in FILES.items(): print(f"Converting {input_file} -> {output_file}") count = 0 with open(input_file, "r", encoding="utf-8") as fin, \ open(output_file, "w", encoding="utf-8") as fout: for line in fin: row = json.loads(line) label = str(row["label"]).strip() text = str(row["content"]) text = text.replace("__label__", "__lbl__") text = " ".join(text.split()) fout.write( f"__label__{label} {text}\n" ) count += 1 print(f"Saved {count:,} samples") print("\nDone.")