File size: 597 Bytes
b127d35 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | import json
def merge_short_lines(file_path,min_length=32):
merged = []
buffer = ""
with open(file_path, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
data = json.loads(line)
text = data["text"]
buffer += " " + text.strip()
if len(buffer) >= min_length:
merged.append({"text": buffer.strip()})
buffer = ""
if buffer.strip():
merged.append({"text": buffer.strip})
print(f"Merged {len(merged)} lines")
return merged |