File size: 733 Bytes
2b45a96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import json

with open(r'D:\MyCode\Python\Model\paddleocr\total_text\test\train.txt', 'r', encoding='utf-8') as f, open(r'D:\MyCode\Python\Model\paddleocr\total_text\train\train_rec.txt', 'w', encoding='utf-8') as out_f:
    for line in f:
        parts = line.strip().split('\t')
        if len(parts) != 2:
            continue  # bỏ qua dòng lỗi

        img_path, annotations = parts
        try:
            ann_list = json.loads(annotations)
            for ann in ann_list:
                text = ann.get("transcription", "").strip()
                if text:
                    out_f.write(f"{img_path}\t{text}\n")
        except json.JSONDecodeError:
            print(f"Lỗi JSON ở dòng: {line}")