English
File size: 446 Bytes
1026698
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
import json
import os

DUMP_PATH = "/media/daniel/HDD/output"

for file in os.listdir(DUMP_PATH):
    if file.endswith(".json") or file.endswith(".jsonl"):
        with open(f"{DUMP_PATH}/{file}", "r", encoding="utf-8") as f:
            for i, line in enumerate(f):
                data = json.loads(line)
                print(i, data.get("title"), "Text length:", len(data.get("text","")))
                if i >= 5:
                    break