|
|
import json
|
|
|
|
|
|
with open(r'D:\MyCode\Python\Model\paddleocr\total_text\test\train.txt', 'r', encoding='utf-8') as f, open(r'D:\MyCode\Python\Model\paddleocr\total_text\train\train_rec.txt', 'w', encoding='utf-8') as out_f:
|
|
|
for line in f:
|
|
|
parts = line.strip().split('\t')
|
|
|
if len(parts) != 2:
|
|
|
continue
|
|
|
|
|
|
img_path, annotations = parts
|
|
|
try:
|
|
|
ann_list = json.loads(annotations)
|
|
|
for ann in ann_list:
|
|
|
text = ann.get("transcription", "").strip()
|
|
|
if text:
|
|
|
out_f.write(f"{img_path}\t{text}\n")
|
|
|
except json.JSONDecodeError:
|
|
|
print(f"Lỗi JSON ở dòng: {line}")
|
|
|
|