File size: 446 Bytes
1026698 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 | import json
import os
DUMP_PATH = "/media/daniel/HDD/output"
for file in os.listdir(DUMP_PATH):
if file.endswith(".json") or file.endswith(".jsonl"):
with open(f"{DUMP_PATH}/{file}", "r", encoding="utf-8") as f:
for i, line in enumerate(f):
data = json.loads(line)
print(i, data.get("title"), "Text length:", len(data.get("text","")))
if i >= 5:
break
|