import json with open(r'D:\MyCode\Python\Model\paddleocr\total_text\test\train.txt', 'r', encoding='utf-8') as f, open(r'D:\MyCode\Python\Model\paddleocr\total_text\train\train_rec.txt', 'w', encoding='utf-8') as out_f: for line in f: parts = line.strip().split('\t') if len(parts) != 2: continue # bỏ qua dòng lỗi img_path, annotations = parts try: ann_list = json.loads(annotations) for ann in ann_list: text = ann.get("transcription", "").strip() if text: out_f.write(f"{img_path}\t{text}\n") except json.JSONDecodeError: print(f"Lỗi JSON ở dòng: {line}")