CreatedNull commited on
Commit
2387be3
·
verified ·
1 Parent(s): 2a48b42

Delete mergelines.py

Browse files
Files changed (1) hide show
  1. mergelines.py +0 -20
mergelines.py DELETED
@@ -1,20 +0,0 @@
1
- import json
2
- def merge_short_lines(file_path,min_length=32):
3
- merged = []
4
- buffer = ""
5
-
6
- with open(file_path, "r", encoding="utf-8") as f:
7
- for line in f:
8
- line = line.strip()
9
- data = json.loads(line)
10
- text = data["text"]
11
- buffer += " " + text.strip()
12
- if len(buffer) >= min_length:
13
- merged.append({"text": buffer.strip()})
14
- buffer = ""
15
-
16
- if buffer.strip():
17
- merged.append({"text": buffer.strip})
18
-
19
- print(f"Merged {len(merged)} lines")
20
- return merged