import json def merge_short_lines(file_path,min_length=32): merged = [] buffer = "" with open(file_path, "r", encoding="utf-8") as f: for line in f: line = line.strip() data = json.loads(line) text = data["text"] buffer += " " + text.strip() if len(buffer) >= min_length: merged.append({"text": buffer.strip()}) buffer = "" if buffer.strip(): merged.append({"text": buffer.strip}) print(f"Merged {len(merged)} lines") return merged