File size: 48,804 Bytes
a4853dc
50d1f57
4fef2dd
24fd13a
 
 
 
 
4fef2dd
a4853dc
 
 
 
 
f69131e
50d1f57
a4853dc
f407757
a4853dc
4fef2dd
50d1f57
f69131e
50d1f57
f69131e
50d1f57
f69131e
 
 
50d1f57
 
cb8c63c
 
d640e48
 
cb8c63c
 
 
 
d640e48
cb8c63c
 
 
 
d640e48
cb8c63c
 
d640e48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb8c63c
 
 
 
 
 
 
 
 
 
 
 
 
50d1f57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f69131e
 
 
 
50d1f57
f69131e
4fef2dd
 
f69131e
a4853dc
4fef2dd
 
50d1f57
d2abad3
 
 
50d1f57
d2abad3
 
 
 
 
 
 
 
 
 
d80899e
50d1f57
d80899e
d2abad3
50d1f57
d80899e
 
 
 
50d1f57
d80899e
 
 
 
f8fef9f
50d1f57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f69131e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50d1f57
 
 
 
 
 
f69131e
 
 
1813932
f69131e
 
 
 
 
 
 
 
1813932
 
f69131e
 
 
1813932
f69131e
 
 
 
1813932
 
f69131e
50d1f57
f69131e
50d1f57
d640e48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f69131e
a4853dc
f69131e
 
 
 
1813932
f69131e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50d1f57
f8fef9f
50d1f57
 
f8fef9f
f69131e
50d1f57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f69131e
 
 
 
 
 
 
 
 
 
 
 
 
50d1f57
f69131e
50d1f57
f69131e
50d1f57
f69131e
 
 
 
50d1f57
f69131e
 
 
50d1f57
 
 
 
 
 
 
 
 
 
 
 
f69131e
 
50d1f57
f69131e
 
50d1f57
f69131e
 
50d1f57
 
 
 
 
 
 
 
 
 
 
 
f69131e
 
50d1f57
f69131e
 
 
50d1f57
 
f69131e
 
d640e48
 
 
50d1f57
f69131e
50d1f57
f69131e
 
50d1f57
f69131e
50d1f57
 
 
f69131e
 
50d1f57
f69131e
50d1f57
f69131e
50d1f57
f69131e
 
50d1f57
 
 
 
 
 
 
 
d640e48
 
 
 
 
 
 
f69131e
d640e48
50d1f57
f69131e
 
 
50d1f57
f69131e
50d1f57
 
 
 
 
f69131e
50d1f57
 
 
 
 
 
f69131e
50d1f57
f69131e
50d1f57
 
 
f69131e
50d1f57
 
 
 
 
 
 
d80899e
50d1f57
 
 
 
 
 
 
 
d80899e
50d1f57
 
 
 
 
 
 
 
 
 
f69131e
 
50d1f57
f69131e
50d1f57
 
f69131e
50d1f57
 
f69131e
50d1f57
 
 
f69131e
50d1f57
 
 
f69131e
50d1f57
f69131e
50d1f57
 
 
f69131e
50d1f57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f69131e
50d1f57
f69131e
 
50d1f57
 
 
 
 
f69131e
50d1f57
 
f69131e
50d1f57
 
f69131e
50d1f57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f69131e
 
 
50d1f57
f69131e
 
50d1f57
 
 
cb8c63c
 
50d1f57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d640e48
 
 
 
 
 
 
 
 
 
50d1f57
 
 
 
 
 
 
 
 
 
 
 
 
 
f69131e
 
50d1f57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1813932
 
f69131e
 
 
 
 
50d1f57
 
 
 
 
 
 
 
d80899e
 
 
 
 
 
 
 
 
 
 
f69131e
50d1f57
 
d80899e
 
 
 
 
50d1f57
 
 
 
 
 
 
 
d80899e
50d1f57
d80899e
50d1f57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1813932
f69131e
 
 
 
 
 
 
 
 
d80899e
f69131e
 
 
 
 
 
 
 
 
 
d80899e
f69131e
 
50d1f57
f69131e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50d1f57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f69131e
 
 
d80899e
 
 
 
 
 
 
 
 
50d1f57
f69131e
50d1f57
f69131e
50d1f57
 
 
 
f69131e
 
 
 
50d1f57
 
 
 
f69131e
 
1813932
f69131e
 
50d1f57
f69131e
50d1f57
f69131e
c87ac5f
 
f69131e
 
 
c87ac5f
f69131e
 
 
c87ac5f
 
f69131e
 
 
 
 
 
c87ac5f
f69131e
 
 
 
 
 
 
f8fef9f
 
50d1f57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f69131e
1813932
f69131e
50d1f57
f69131e
 
 
a4853dc
50d1f57
 
 
a4853dc
50d1f57
 
f69131e
 
50d1f57
a4853dc
 
f69131e
50d1f57
4fef2dd
 
1813932
50d1f57
4fef2dd
50d1f57
c87ac5f
50d1f57
f69131e
 
 
a4853dc
f69131e
50d1f57
a4853dc
 
f69131e
 
 
a4853dc
f69131e
 
 
c87ac5f
50d1f57
4fef2dd
f69131e
 
 
 
a4853dc
4fef2dd
50d1f57
c87ac5f
1813932
4fef2dd
f69131e
 
 
1813932
50d1f57
1813932
a4853dc
50d1f57
 
1813932
 
a4853dc
50d1f57
f69131e
a4853dc
4fef2dd
1813932
4fef2dd
a4853dc
 
d6af665
50d1f57
a4853dc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
# src/inference.py

import torch
_orig_torch_load = torch.load
def _patched_load(*args, **kwargs):
    kwargs.setdefault("weights_only", False)
    return _orig_torch_load(*args, **kwargs)
torch.load = _patched_load

import cv2
import json
import numpy as np
from pathlib import Path
from ultralytics import RTDETR
import re
from difflib import SequenceMatcher

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"[INFO] Device: {DEVICE}")

CLASS_NAMES = ["note", "part-drawing", "table"]
CLASS_DISPLAY = {"note": "Note", "part-drawing": "PartDrawing", "table": "Table"}
COLORS = {"note": (0,165,255), "part-drawing": (0,200,0), "table": (0,0,220)}

_det_model = None
_ocr_paddle = None
_ocr_paddle_en = None
_ocr_easyocr = None
_ocr_vietocr = None


REALESRGAN_AVAILABLE = False
_esrgan_upsampler = None  # Thêm biến global

try:
    from realesrgan import RealESRGANer
    from basicsr.archs.rrdbnet_arch import RRDBNet
    REALESRGAN_AVAILABLE = True
    print("[INFO] Real-ESRGAN is available")
except ImportError:
    print("[WARN] Real-ESRGAN not installed. Install: pip install realesrgan basicsr")

def get_esrgan_upsampler():
    global _esrgan_upsampler
    if not REALESRGAN_AVAILABLE:
        return None
        
    if _esrgan_upsampler is None:
        try:
            print("[INFO] Loading Real-ESRGAN model...")
            model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4)
            _esrgan_upsampler = RealESRGANer(
                scale=4,
                model_path='weights/RealESRGAN_x4plus_anime_6B.pth',
                model=model,
                device=DEVICE
            )
        except Exception as e:
            print(f"[WARN] Failed to load Real-ESRGAN: {e}")
            return None
            
    return _esrgan_upsampler

def upscale_if_needed(img_bgr, min_dim=300):
    """Upscale image using Real-ESRGAN if both dimensions are below threshold."""
    h, w = img_bgr.shape[:2]
    if h < min_dim or w < min_dim:
        upsampler = get_esrgan_upsampler()
        if upsampler is not None:
            try:
                output, _ = upsampler.enhance(img_bgr, outscale=2)
                return output
            except Exception as e:
                print(f"[WARN] ESRGAN upscale failed: {e}")
    return img_bgr
# ============================================================
# DOMAIN DICTIONARY — Từ điển bản vẽ kỹ thuật Việt Nam
# ============================================================

# Từ điển các từ thường gặp trong bảng kê bản vẽ kỹ thuật
TECH_DICTIONARY = {
    # Tên chi tiết
    "bọc táp": "Bọc táp",
    "boc tap": "Bọc táp",
    "bạc táp": "Bọc táp",
    "bọc tốp": "Bọc táp",
    "bọc tot": "Bọc táp",
    "vòng đệm": "Vòng đệm",
    "vong dem": "Vòng đệm",
    "vòng dệm": "Vòng đệm",
    "vong đệm": "Vòng đệm",
    "chốt trụ": "Chốt trụ",
    "chot tru": "Chốt trụ",
    "chốt trự": "Chốt trụ",
    "chot trụ": "Chốt trụ",
    "vít": "Vít",
    "vit": "Vít",
    "bu lông": "Bu lông",
    "bu long": "Bu lông",
    "bulong": "Bu lông",
    "bu-lông": "Bu lông",
    "bulông": "Bu lông",
    "vòng đệm vênh": "Vòng đệm vênh",
    "vong dem venh": "Vòng đệm vênh",
    "vòng dệm vênh": "Vòng đệm vênh",
    "then bằng": "Then bằng",
    "then bang": "Then bằng",
    "then bảng": "Then bằng",
    "ống dẫn": "Ống dẫn",
    "ong dan": "Ống dẫn",
    "ống chẫn": "Ống dẫn",
    "ống dần": "Ống dẫn",
    "ông dẫn": "Ống dẫn",
    "ông chẫn": "Ống dẫn",
    "ống chến": "Ống dẫn",
    "ông chến": "Ống dẫn",
    "chốt chặn": "Chốt chặn",
    "chot chan": "Chốt chặn",
    "chốt chắn": "Chốt chặn",
    "cnốt chến": "Chốt chặn",
    "chốt chén": "Chốt chặn",
    "bạc lót": "Bạc lót",
    "bac lot": "Bạc lót",
    "bọc lót": "Bạc lót",
    "bạc lốt": "Bạc lót",
    "bọc lết": "Bạc lót",
    "bọc lết": "Bạc lót",
    "giá đỡ": "Giá đỡ",
    "gia do": "Giá đỡ",
    "giá dở": "Giá đỡ",
    "giá đở": "Giá đỡ",
    "bánh răng": "Bánh răng",
    "banh rang": "Bánh răng",
    "bành răng": "Bánh răng",
    "bánh rằng": "Bánh răng",
    "bảnh răng": "Bánh răng",
    "bdnh răng": "Bánh răng",
    "bdình răng": "Bánh răng",
    "hộp bánh răng": "Hộp bánh răng",
    "hop banh rang": "Hộp bánh răng",
    "hộp bành răng": "Hộp bánh răng",
    "mộp bành răng": "Hộp bánh răng",
    "mộp bánh răng": "Hộp bánh răng",
    "nắp": "Nắp",
    "nap": "Nắp",
    "năp": "Nắp",
    "nốp": "Nắp",
    
    # Vật liệu
    "đồng nhôm": "Đồng nhôm",
    "dong nhom": "Đồng nhôm",
    "đồng thanh": "Đồng nhôm",
    "đồng thann": "Đồng nhôm",
    "đổng nhôm": "Đồng nhôm",
    "đống nhôm": "Đồng nhôm",
    "đống thanh": "Đồng nhôm",
    "thép ct3": "Thép CT3",
    "thep ct3": "Thép CT3",
    "thếp ct3": "Thép CT3",
    "tnép ct3": "Thép CT3",
    "thếp cts": "Thép CT3",
    "tnếp ct3": "Thép CT3",
    "thếp ctj": "Thép CT3",
    "tnép ctj": "Thép CT3",
    "tnếp ctj": "Thép CT3",
    "thep ctj": "Thép CT3",
    "thép 65": "Thép 65",
    "thep 65": "Thép 65",
    "thếp 65": "Thép 65",
    "tnếp 65": "Thép 65",
    "thếp 65f": "Thép 65",
    "tnép 65f": "Thép 65",
    "thép 6sf": "Thép 65",
    "thep 6sf": "Thép 65",
    "thếp 6sf": "Thép 65",
    "tnép 6sf": "Thép 65",
    "thép 45": "Thép 45",
    "thep 45": "Thép 45",
    "thếp 45": "Thép 45",
    "tnếp 45": "Thép 45",
    "sắt tây": "Sắt tây",
    "sat tay": "Sắt tây",
    "sắt tay": "Sắt tây",
    "sdi tay": "Sắt tây",
    "sdi day": "Sắt tây",
    "sdi đay": "Sắt tây",
    "gang 15-32": "Gang 15-32",
    "gang15-32": "Gang 15-32",
    "gong 15-32": "Gang 15-32",
    "gong15-32": "Gang 15-32",
    "gang 15.32": "Gang 15-32",
    "gang 15 32": "Gang 15-32",
    "gong 15.32": "Gang 15-32",
    "gang1532": "Gang 15-32",
    
    # Header
    "vị trí": "Vị trí",
    "vi tri": "Vị trí",
    "v.trí": "Vị trí",
    "tên chi tiết": "Tên chi tiết",
    "ten chi tiet": "Tên chi tiết",
    "tên chi tiết máy": "Tên chi tiết máy",
    "ten chi tiet may": "Tên chi tiết máy",
    "số lg": "Số lg",
    "so lg": "Số lg",
    "số lượng": "Số lg",
    "so luong": "Số lg",
    "s.lg": "Số lg",
    "số lý": "Số lg",
    "vật liệu": "Vật liệu",
    "vat lieu": "Vật liệu",
    "vat liéu": "Vật liệu",
    "ghi chú": "Ghi chú",
    "ghi chu": "Ghi chú",
    
    # Title block
    "bản vẽ số": "Bản vẽ số",
    "ban ve so": "Bản vẽ số",
    "bản gối": "Bản vẽ số",
    "bơm bánh răng": "BƠM BÁNH RĂNG",
    "bom banh rang": "BƠM BÁNH RĂNG",
    "bớm bánh răng": "BƠM BÁNH RĂNG",
    "bản vẽ lắp số": "Bản vẽ lắp số",
    "ban ve lap so": "Bản vẽ lắp số",
    "bản vể lắp số": "Bản vẽ lắp số",
    "bán vẽ lắp số": "Bản vẽ lắp số",
    "bán vể lắp số": "Bản vẽ lắp số",
    "tỷ lệ": "Tỷ lệ",
    "ty le": "Tỷ lệ",
    "tý lệ": "Tỷ lệ",
    "bộ môn hình hoạ": "Bộ môn Hình hoạ",
    "bộ môn hình họa": "Bộ môn Hình hoạ",
    "bo mon hinh hoa": "Bộ môn Hình hoạ",
    "bộ mốn hình hoạ": "Bộ môn Hình hoạ",
    "đại học bách khoa hà nội": "Đại học Bách khoa Hà Nội",
    "dai hoc bach khoa ha noi": "Đại học Bách khoa Hà Nội",
    "đại học bách khoa": "Đại học Bách khoa Hà Nội",
    "bại hoc bách khoa": "Đại học Bách khoa Hà Nội",
    "bại học bách khoa hà nội": "Đại học Bách khoa Hà Nội",
}

# Canonical part names for fuzzy matching
CANONICAL_PARTS = [
    "Bọc táp", "Vòng đệm", "Chốt trụ", "Vít", "Bu lông",
    "Vòng đệm vênh", "Then bằng", "Ống dẫn", "Chốt chặn",
    "Bạc lót", "Giá đỡ", "Bánh răng", "Hộp bánh răng", "Nắp",
]

CANONICAL_MATERIALS = [
    "Đồng nhôm", "Thép CT3", "Thép 65", "Thép 45",
    "Sắt tây", "Gang 15-32",
]

CANONICAL_HEADERS = [
    "Vị trí", "Tên chi tiết", "Tên chi tiết máy", "Số lg", 
    "Vật liệu", "Ghi chú",
]


def fuzzy_match(text, candidates, threshold=0.55):
    """Fuzzy match text against candidates, return best match if above threshold."""
    if not text or not candidates:
        return text
    
    text_lower = text.lower().strip()
    
    # Exact match in dictionary first
    if text_lower in TECH_DICTIONARY:
        return TECH_DICTIONARY[text_lower]
    
    # Fuzzy match
    best_match = None
    best_score = 0
    
    for candidate in candidates:
        score = SequenceMatcher(None, text_lower, candidate.lower()).ratio()
        if score > best_score:
            best_score = score
            best_match = candidate
    
    if best_score >= threshold:
        return best_match
    
    return text


def correct_technical_text(text, column_type="auto"):
    """
    Sửa lỗi OCR dựa trên domain knowledge.
    column_type: "position", "name", "quantity", "material", "note", "auto"
    """
    if not text or not text.strip():
        return text
    
    original = text.strip()
    text_lower = original.lower()
    
    # 1. Exact dictionary lookup
    if text_lower in TECH_DICTIONARY:
        return TECH_DICTIONARY[text_lower]
    
    # 2. Column-specific corrections
    if column_type == "position" or (column_type == "auto" and original.replace('.','').replace(',','').isdigit()):
        # Position column — should be a number
        cleaned = re.sub(r'[^0-9]', '', original)
        if cleaned:
            return cleaned
        return original
    
    if column_type == "quantity" or (column_type == "auto" and len(original) <= 2 and any(c.isdigit() for c in original)):
        cleaned = re.sub(r'[^0-9]', '', original)
        if cleaned:
            return cleaned
        return original
    
    if column_type == "name":
        # Try fuzzy match against known part names
        result = fuzzy_match(original, CANONICAL_PARTS, threshold=0.5)
        if result != original:
            return result
        # Also check headers
        result = fuzzy_match(original, CANONICAL_HEADERS, threshold=0.5)
        if result != original:
            return result
    
    if column_type == "material":
        result = fuzzy_match(original, CANONICAL_MATERIALS, threshold=0.5)
        if result != original:
            return result
    
    if column_type == "auto":
        # Try all categories
        for candidates in [CANONICAL_PARTS, CANONICAL_MATERIALS, CANONICAL_HEADERS]:
            result = fuzzy_match(original, candidates, threshold=0.55)
            if result != original:
                return result
    
    # 3. General corrections
    text_out = original
    
    # Fix common OCR character substitutions
    # M followed by digits (bolt/screw specs)
    text_out = re.sub(r'[Mm]\s*(\d)', r'M\1', text_out)
    
    # Fix: "5x8-35" style dimensions  
    text_out = re.sub(r'(\d+)\s*[xX×]\s*(\d+)\s*[-–]\s*(\d+)', r'\1x\2-\3', text_out)
    text_out = re.sub(r'(\d+)\s*[xX×]\s*(\d+)\s*[xX×]\s*(\d+)', r'\1x\2x\3', text_out)
    
    # Fix: "3n8.35" → "5x8-35" (common OCR error for handwriting)
    text_out = re.sub(r'(\d+)\s*n\s*(\d+)', r'\1x\2', text_out)
    
    # Fix: dimension specs like "4x6x14" or "4*6*14"  
    text_out = re.sub(r'(\d+)\s*[*]\s*(\d+)\s*[*]\s*(\d+)', r'\1x\2x\3', text_out)
    
    return text_out


def correct_table_row(row, num_columns=5):
    """
    Sửa lỗi cho toàn bộ 1 row, biết vị trí cột.
    Columns: [Vị trí, Tên chi tiết, Số lg, Vật liệu, Ghi chú]
    """
    if not row:
        return row
    
    corrected = list(row)
    
    # Pad to expected columns
    while len(corrected) < num_columns:
        corrected.append("")
    
    # Trim excess
    if len(corrected) > num_columns:
        corrected = corrected[:num_columns]
    
    # Column 0: Vị trí (number)
    if corrected[0]:
        corrected[0] = correct_technical_text(corrected[0], "position")
    
    # Column 1: Tên chi tiết (part name)
    if corrected[1]:
        corrected[1] = correct_technical_text(corrected[1], "name")
    
    # Column 2: Số lg (quantity - number)
    if corrected[2]:
        corrected[2] = correct_technical_text(corrected[2], "quantity")
    
    # Column 3: Vật liệu (material)
    if corrected[3]:
        corrected[3] = correct_technical_text(corrected[3], "material")
    
    # Column 4: Ghi chú (note - keep as-is mostly)
    if corrected[4]:
        corrected[4] = correct_technical_text(corrected[4], "auto")
    
    return corrected


# ============================================================
# MODEL LOADERS
# ============================================================

def get_det_model(checkpoint="best.pt"):
    global _det_model
    if _det_model is None:
        print(f"[INFO] Loading detection model: {checkpoint}")
        _det_model = RTDETR(checkpoint)
    return _det_model


# ============================================================
# SURYA OCR (optional)
# ============================================================

SURYA_AVAILABLE = False
try:
    from surya.ocr import run_ocr
    from surya.model.detection.model import load_det_processor, load_det_model
    from surya.model.recognition.model import load_rec_model
    from surya.model.recognition.processor import load_rec_processor
    SURYA_AVAILABLE = True
    print("[INFO] Surya OCR is available")
except ImportError:
    print("[WARN] Surya OCR not installed. Install with: pip install surya-ocr")


def ocr_with_surya(img_bgr, langs=["vi", "en"]):
    if not SURYA_AVAILABLE:
        raise ImportError("Surya OCR is not installed.")
    det_processor, det_model = load_det_processor(), load_det_model()
    rec_model, rec_processor = load_rec_model(), load_rec_processor()
    from PIL import Image
    pil_img = Image.fromarray(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB))
    predictions = run_ocr([pil_img], [langs], det_model, det_processor,
                          rec_model, rec_processor)
    texts = [line.text for line in predictions[0].text_lines]
    return "\n".join(texts)


# ============================================================
# VietOCR (optional - tốt cho chữ viết tay tiếng Việt)
# ============================================================

VIETOCR_AVAILABLE = False
try:
    from vietocr.tool.predictor import Predictor
    from vietocr.tool.config import Cfg
    VIETOCR_AVAILABLE = True
    print("[INFO] VietOCR is available")
except ImportError:
    print("[WARN] VietOCR not installed. Install with: pip install vietocr")


def get_vietocr():
    global _ocr_vietocr
    if _ocr_vietocr is None and VIETOCR_AVAILABLE:
        try:
            config = Cfg.load_config_from_name('vgg_transformer')
            config['cnn']['pretrained'] = True
            config['device'] = DEVICE
            _ocr_vietocr = Predictor(config)
            print("[INFO] VietOCR loaded successfully")
        except Exception as e:
            print(f"[WARN] VietOCR load failed: {e}")
    return _ocr_vietocr


def ocr_line_vietocr(img_bgr):
    """OCR a single text line image using VietOCR."""
    predictor = get_vietocr()
    if predictor is None:
        return ""
    from PIL import Image
    pil_img = Image.fromarray(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB))
    text = predictor.predict(pil_img)
    return text.strip()


# ============================================================
# PaddleOCR / EasyOCR
# ============================================================

def get_paddle_reader(lang='vi'):
    global _ocr_paddle, _ocr_paddle_en
    
    if lang == 'en':
        if _ocr_paddle_en is not None:
            return _ocr_paddle_en
    else:
        if _ocr_paddle is not None:
            return _ocr_paddle
    
    try:
        from paddleocr import PaddleOCR
        print(f"[INFO] Initializing PaddleOCR PP-OCRv4 (lang={lang})...")
        reader = PaddleOCR(
            lang=lang,
            use_angle_cls=True,
            use_gpu=(DEVICE == "cuda"),
            show_log=False,
            ocr_version='PP-OCRv4',
            det_db_thresh=0.15,
            det_db_box_thresh=0.2,
            det_db_unclip_ratio=2.0,
            use_dilation=True,
            det_db_score_mode='slow',
            rec_image_shape="3,48,320",
            max_text_length=80,
            rec_batch_num=6,
        )
        if lang == 'en':
            _ocr_paddle_en = reader
        else:
            _ocr_paddle = reader
        return reader
    except Exception as e:
        print(f"[WARN] PaddleOCR init failed: {e}")
        return None


def get_easyocr_reader():
    global _ocr_easyocr
    if _ocr_easyocr is None:
        import easyocr
        _ocr_easyocr = easyocr.Reader(
            ["vi", "en"], gpu=(DEVICE == "cuda"), verbose=False
        )
    return _ocr_easyocr


# ============================================================
# PREPROCESSING
# ============================================================

def enhance_faded_text(img_bgr):
    """Giải pháp 4: Unsharp Masking kết hợp Local Thresholding cho nét chữ mờ"""
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    
    # 1. Unsharp Masking (Tăng cường cạnh/nét chữ)
    gaussian = cv2.GaussianBlur(gray, (0, 0), 2.0)
    unsharp = cv2.addWeighted(gray, 1.5, gaussian, -0.5, 0)
    
    # 2. Ngưỡng cục bộ (Local Thresholding)
    try:
        from skimage.filters import threshold_sauvola
        window_size = 25
        thresh = threshold_sauvola(unsharp, window_size=window_size)
        binary = (unsharp > thresh) * 255
        binary = binary.astype(np.uint8)
    except ImportError:
        # Fallback về OpenCV nếu chưa cài scikit-image
        binary = cv2.adaptiveThreshold(unsharp, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
                                       cv2.THRESH_BINARY, 21, 10)
    
    return cv2.cvtColor(binary, cv2.COLOR_GRAY2BGR)

def preprocess_for_ocr(img_bgr, min_width=1500, mode="note"):
    h, w = img_bgr.shape[:2]
    
    if w < min_width:
        scale = min_width / w
        img_bgr = cv2.resize(img_bgr, None, fx=scale, fy=scale,
                             interpolation=cv2.INTER_CUBIC)
        h, w = img_bgr.shape[:2]
    
    if mode == "note":
        img_proc = cv2.bilateralFilter(img_bgr, 9, 75, 75)
        lab = cv2.cvtColor(img_proc, cv2.COLOR_BGR2LAB)
        l, a, b = cv2.split(lab)
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        l = clahe.apply(l)
        lab = cv2.merge([l, a, b])
        img_proc = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
        
        kernel = np.array([[0, -0.5, 0],
                           [-0.5, 3, -0.5],
                           [0, -0.5, 0]])
        img_proc = cv2.filter2D(img_proc, -1, kernel)
        return img_proc
    
    else:  # table
        img_proc = cv2.bilateralFilter(img_bgr, 11, 80, 80)
        lab = cv2.cvtColor(img_proc, cv2.COLOR_BGR2LAB)
        l, a, b = cv2.split(lab)
        clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(4, 4))
        l = clahe.apply(l)
        lab = cv2.merge([l, a, b])
        img_proc = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
        return img_proc


def preprocess_for_handwriting(img_bgr, min_width=1800):
    """
    Tiền xử lý đặc biệt cho chữ viết tay.
    Tăng contrast mạnh, loại bỏ đường kẻ bảng, giữ nét chữ.
    """
    h, w = img_bgr.shape[:2]
    
    if w < min_width:
        scale = min_width / w
        img_bgr = cv2.resize(img_bgr, None, fx=scale, fy=scale,
                             interpolation=cv2.INTER_CUBIC)
    
    # Convert to grayscale
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    
    # Remove horizontal and vertical lines (table borders)
    h_img, w_img = gray.shape
    
    # Detect and remove horizontal lines
    h_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (max(40, w_img // 10), 1))
    h_lines = cv2.morphologyEx(~gray, cv2.MORPH_OPEN, h_kernel, iterations=1)
    gray_no_lines = gray.copy()
    gray_no_lines[h_lines > 128] = 255
    
    # Detect and remove vertical lines
    v_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, max(40, h_img // 10)))
    v_lines = cv2.morphologyEx(~gray, cv2.MORPH_OPEN, v_kernel, iterations=1)
    gray_no_lines[v_lines > 128] = 255
    
    # CLAHE for better contrast
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
    enhanced = clahe.apply(gray_no_lines)
    
    # Adaptive threshold — good for handwriting
    binary = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                    cv2.THRESH_BINARY, 21, 10)
    
    # Light morphological cleaning
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
    binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel, iterations=1)
    
    return cv2.cvtColor(binary, cv2.COLOR_GRAY2BGR)


def preprocess_grayscale_variant(img_bgr, min_width=1500):
    h, w = img_bgr.shape[:2]
    if w < min_width:
        scale = min_width / w
        img_bgr = cv2.resize(img_bgr, None, fx=scale, fy=scale,
                             interpolation=cv2.INTER_CUBIC)
    
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    gray = clahe.apply(gray)
    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    return cv2.cvtColor(binary, cv2.COLOR_GRAY2BGR)


# ============================================================
# OCR FUNCTIONS
# ============================================================

def ocr_single_pass(reader, img_bgr):
    """Run OCR once, return (list_of_dicts, avg_confidence)."""
    if hasattr(reader, 'ocr'):  # PaddleOCR
        result = reader.ocr(img_bgr, cls=True)
        if not result or not result[0]:
            return [], 0.0
        items = []
        confs = []
        for line in result[0]:
            box, (text, conf) = line
            if conf >= 0.15 and text.strip():
                xs = [p[0] for p in box]
                ys = [p[1] for p in box]
                items.append({
                    "text": text.strip(),
                    "conf": conf,
                    "x": np.mean(xs),
                    "y": np.mean(ys),
                    "x1": min(xs), "y1": min(ys),
                    "x2": max(xs), "y2": max(ys),
                    "box": box
                })
                confs.append(conf)
        avg_conf = np.mean(confs) if confs else 0.0
        return items, avg_conf
    else:  # EasyOCR
        results = reader.readtext(img_bgr, detail=1, paragraph=False)
        items = []
        confs = []
        for (pts, text, conf) in results:
            if conf >= 0.1 and text.strip():
                xs = [p[0] for p in pts]
                ys = [p[1] for p in pts]
                items.append({
                    "text": text.strip(),
                    "conf": conf,
                    "x": np.mean(xs),
                    "y": np.mean(ys),
                    "x1": min(xs), "y1": min(ys),
                    "x2": max(xs), "y2": max(ys),
                    "box": pts
                })
                confs.append(conf)
        avg_conf = np.mean(confs) if confs else 0.0
        return items, avg_conf


def multi_pass_ocr(img_bgr, reader, ocr_type="note"):
    """Multi-pass OCR with different preprocessings."""
    best_items = []
    best_conf = 0.0
    
    # [NẾU ẢNH NHỎ LÀ DO CẮT TỪ GÓC, ÚP SCALE LUÔN TRƯỚC KHI LÀM GÌ ĐÓ]
    img_bgr = upscale_if_needed(img_bgr, min_dim=400)
    
    # Pass 1: Color preprocessing
    img_v1 = preprocess_for_ocr(img_bgr, min_width=1500, mode=ocr_type)
    items1, conf1 = ocr_single_pass(reader, img_v1)
    if conf1 > best_conf:
        best_conf = conf1
        best_items = items1
    
    # Pass 2: Handwriting-optimized preprocessing
    img_v2 = preprocess_for_handwriting(img_bgr, min_width=1800)
    items2, conf2 = ocr_single_pass(reader, img_v2)
    if conf2 > best_conf:
        best_conf = conf2
        best_items = items2
    
    # Pass 3: Extra upscale
    img_v3 = preprocess_for_ocr(img_bgr, min_width=2500, mode=ocr_type)
    items3, conf3 = ocr_single_pass(reader, img_v3)
    if conf3 > best_conf:
        best_conf = conf3
        best_items = items3
    
    # Pass 4: Grayscale Otsu
    img_v4 = preprocess_grayscale_variant(img_bgr, min_width=1500)
    items4, conf4 = ocr_single_pass(reader, img_v4)
    if conf4 > best_conf:
        best_conf = conf4
        best_items = items4

    # --- THÊM PASS 5: Giải quyết chữ bị mờ, lợt ---
    img_v5 = enhance_faded_text(img_bgr)
    items5, conf5 = ocr_single_pass(reader, img_v5)
    if conf5 > best_conf:
        best_conf = conf5
        best_items = items5
    
    print(f"      Multi-pass confidences: {conf1:.3f}, {conf2:.3f}, {conf3:.3f}, {conf4:.3f}, {conf5:.3f} → best={best_conf:.3f}")
    return best_items, best_conf


# ============================================================
# TABLE STRUCTURE — Intersection-based cell detection
# ============================================================

def detect_lines(gray, direction="horizontal", min_length_ratio=0.15):
    """Detect lines in image."""
    h, w = gray.shape
    _, binary = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV)
    
    if direction == "horizontal":
        kernel_len = max(30, int(w * min_length_ratio))
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_len, 1))
    else:
        kernel_len = max(30, int(h * min_length_ratio))
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_len))
    
    lines_img = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel, iterations=2)
    
    # Dilate slightly to connect broken lines
    dilate_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    lines_img = cv2.dilate(lines_img, dilate_kernel, iterations=1)
    
    return lines_img


def find_line_positions(lines_img, direction="horizontal", merge_distance=10):
    """Find positions of lines (y-coords for horizontal, x-coords for vertical)."""
    if direction == "horizontal":
        projection = np.sum(lines_img, axis=1)
    else:
        projection = np.sum(lines_img, axis=0)
    
    # Find peaks
    threshold = np.max(projection) * 0.3
    positions = np.where(projection > threshold)[0]
    
    if len(positions) == 0:
        return []
    
    # Merge close positions
    merged = [positions[0]]
    for pos in positions[1:]:
        if pos - merged[-1] > merge_distance:
            merged.append(pos)
        else:
            # Take average
            merged[-1] = (merged[-1] + pos) // 2
    
    return merged


def detect_table_cells_by_intersection(img_bgr):
    """
    Detect table cells by finding intersections of horizontal and vertical lines.
    Returns list of cells as (x1, y1, x2, y2) tuples, organized in grid.
    """
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    h, w = gray.shape
    
    # Detect horizontal and vertical lines
    h_lines = detect_lines(gray, "horizontal", min_length_ratio=0.1)
    v_lines = detect_lines(gray, "vertical", min_length_ratio=0.1)
    
    # Find line positions
    y_positions = find_line_positions(h_lines, "horizontal", merge_distance=max(8, h//50))
    x_positions = find_line_positions(v_lines, "vertical", merge_distance=max(8, w//50))
    
    print(f"      Table grid: {len(y_positions)} horizontal × {len(x_positions)} vertical lines")
    
    if len(y_positions) < 2 or len(x_positions) < 2:
        # Fallback to contour-based detection
        return detect_table_structure(img_bgr), None
    
    # Generate cells from grid intersections
    cells = []
    grid = []
    for i in range(len(y_positions) - 1):
        row_cells = []
        for j in range(len(x_positions) - 1):
            x1, y1 = x_positions[j], y_positions[i]
            x2, y2 = x_positions[j + 1], y_positions[i + 1]
            
            # Filter tiny cells
            if (x2 - x1) < 10 or (y2 - y1) < 10:
                continue
            
            cells.append((x1, y1, x2, y2))
            row_cells.append((x1, y1, x2, y2))
        
        if row_cells:
            grid.append(row_cells)
    
    return cells, grid


def detect_table_structure(img_bgr):
    """Fallback contour-based cell detection."""
    h, w = img_bgr.shape[:2]
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV)
    
    h_kernel_len = max(40, w // 15)
    v_kernel_len = max(40, h // 15)
    
    horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (h_kernel_len, 1))
    horizontal_lines = cv2.morphologyEx(binary, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
    
    vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, v_kernel_len))
    vertical_lines = cv2.morphologyEx(binary, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
    
    table_structure = cv2.add(horizontal_lines, vertical_lines)
    contours, _ = cv2.findContours(table_structure, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    
    cells = []
    min_cell_area = (w * h) * 0.001
    max_cell_area = (w * h) * 0.85
    
    for cnt in contours:
        x, y, cw, ch = cv2.boundingRect(cnt)
        area = cw * ch
        if min_cell_area < area < max_cell_area and cw > 15 and ch > 15:
            cells.append((x, y, x + cw, y + ch))
    
    cells = sorted(set(cells), key=lambda r: (r[1], r[0]))
    return cells


# ============================================================
# OCR TABLE — Grid-based approach
# ============================================================

def ocr_cell_improved(img_cell, backend="paddle"):
    if img_cell is None or img_cell.size == 0:
        return ""
    # Upscale very small cells with ESRGAN
    img_cell = upscale_if_needed(img_cell, min_dim=150)
    h, w = img_cell.shape[:2]
    if h < 5 or w < 5:
        return ""
    
    # Upscale small cells
    target_h = max(64, h)
    if h < target_h:
        scale = target_h / h
        img_cell = cv2.resize(img_cell, None, fx=scale, fy=scale,
                              interpolation=cv2.INTER_CUBIC)
    
    target_w = max(200, w)
    if w < target_w:
        scale_w = target_w / w
        if scale_w > 1:
            img_cell = cv2.resize(img_cell, None, fx=scale_w, fy=scale_w,
                                  interpolation=cv2.INTER_CUBIC)
    
    best_text = ""
    best_conf = 0
    
    # Try VietOCR first (better for handwriting)
    if VIETOCR_AVAILABLE:
        try:
            vietocr_text = ocr_line_vietocr(img_cell)
            if vietocr_text:
                best_text = vietocr_text
                best_conf = 0.7  # Default confidence for VietOCR
        except Exception as e:
            pass
    
    # Try PaddleOCR / EasyOCR
    if backend == "paddle":
        reader = get_paddle_reader('vi')
    elif backend == "surya":
        text = ocr_with_surya(img_cell, langs=["vi", "en"])
        if text.strip():
            return text.strip()
        reader = get_paddle_reader('vi')
    else:
        reader = get_easyocr_reader()
    
    if reader is None:
        reader = get_easyocr_reader()
    
    if reader is None:
        return best_text
    
    # Variant 1: Color with CLAHE
    img_proc1 = cv2.bilateralFilter(img_cell, 5, 50, 50)
    lab = cv2.cvtColor(img_proc1, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(4, 4))
    l = clahe.apply(l)
    lab = cv2.merge([l, a, b])
    img_proc1 = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
    
    items1, conf1 = ocr_single_pass(reader, img_proc1)
    text1 = " ".join([it["text"] for it in items1])
    if conf1 > best_conf and text1.strip():
        best_conf = conf1
        best_text = text1
    
    # Variant 2: Handwriting preprocessing (remove lines)
    img_proc2 = preprocess_for_handwriting(img_cell, min_width=300)
    items2, conf2 = ocr_single_pass(reader, img_proc2)
    text2 = " ".join([it["text"] for it in items2])
    if conf2 > best_conf and text2.strip():
        best_conf = conf2
        best_text = text2
    
    # Variant 3: Binary Otsu
    gray = cv2.cvtColor(img_cell, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    img_proc3 = cv2.cvtColor(binary, cv2.COLOR_GRAY2BGR)
    items3, conf3 = ocr_single_pass(reader, img_proc3)
    text3 = " ".join([it["text"] for it in items3])
    if conf3 > best_conf and text3.strip():
        best_conf = conf3
        best_text = text3

    # --- THÊM VARIANT 4: Dành cho nét chữ viết tay bị mờ/đứt nét ---
    img_proc4 = enhance_faded_text(img_cell)
    items4, conf4 = ocr_single_pass(reader, img_proc4)
    text4 = " ".join([it["text"] for it in items4])
    if conf4 > best_conf and text4.strip():
        best_conf = conf4
        best_text = text4
        
    # Also try English PaddleOCR for specs like "M6x50", "CT3"
    
    # Also try English PaddleOCR for specs like "M6x50", "CT3"
    if backend == "paddle":
        reader_en = get_paddle_reader('en')
        if reader_en:
            items_en, conf_en = ocr_single_pass(reader_en, img_proc1)
            text_en = " ".join([it["text"] for it in items_en])
            if conf_en > best_conf and text_en.strip():
                # Only prefer English if it looks like specs/numbers
                if re.search(r'[A-Z0-9]', text_en):
                    best_conf = conf_en
                    best_text = text_en
    
    return best_text


def ocr_table_grid(img, backend="paddle"):
    """
    OCR table using grid-based cell detection.
    Key improvement: detect grid structure first, then OCR each cell.
    """
    result = detect_table_cells_by_intersection(img)
    
    if isinstance(result, tuple):
        cells, grid = result
    else:
        cells = result
        grid = None
    
    if grid and len(grid) > 0:
        print(f"      Grid detected: {len(grid)} rows")
        
        # OCR each cell in grid order
        all_rows = []
        for row_idx, row_cells in enumerate(grid):
            row_texts = []
            for cell_box in row_cells:
                x1, y1, x2, y2 = cell_box
                
                # Extract cell with padding
                pad = 3
                cy1 = max(0, y1 + pad)  # +pad to skip the line itself
                cx1 = max(0, x1 + pad)
                cy2 = min(img.shape[0], y2 - pad)
                cx2 = min(img.shape[1], x2 - pad)
                
                if cy2 <= cy1 or cx2 <= cx1:
                    row_texts.append("")
                    continue
                
                cell_img = img[cy1:cy2, cx1:cx2]
                text = ocr_cell_improved(cell_img, backend=backend)
                row_texts.append(text.strip())
            
            if any(t for t in row_texts):  # Skip empty rows
                all_rows.append(row_texts)
        
        if all_rows:
            # Determine number of columns (use most common column count)
            col_counts = [len(r) for r in all_rows]
            if col_counts:
                expected_cols = max(set(col_counts), key=col_counts.count)
                
                # Normalize rows to same column count
                normalized_rows = []
                for row in all_rows:
                    if len(row) < expected_cols:
                        row = row + [""] * (expected_cols - len(row))
                    elif len(row) > expected_cols:
                        row = row[:expected_cols]
                    normalized_rows.append(row)
                
                # Apply domain correction
                corrected_rows = []
                for row in normalized_rows:
                    if expected_cols >= 4:
                        corrected = correct_table_row(row, num_columns=expected_cols)
                    else:
                        corrected = [correct_technical_text(cell) for cell in row]
                    corrected_rows.append(corrected)
                
                text = "\n".join(" | ".join(r) for r in corrected_rows)
                return {"rows": corrected_rows, "text": text}
    
    # Fallback if grid detection failed
    return None


def ocr_table(img_path, backend="paddle"):
    img = cv2.imread(img_path)
    if img is None:
        return {"rows": [], "text": ""}
    
    # Strategy 1: Grid-based cell detection + OCR
    print(f"      Trying grid-based table OCR...")
    result = ocr_table_grid(img, backend)
    if result and result.get("rows"):
        print(f"      Grid OCR: {len(result['rows'])} rows")
        return result
    
    # Strategy 2: PPStructure (if paddle backend)
    if backend == "paddle":
        pp_engine = get_pp_structure()
        if pp_engine is not None:
            try:
                h, w = img.shape[:2]
                if w < 1200:
                    scale = 1200 / w
                    img_scaled = cv2.resize(img, None, fx=scale, fy=scale,
                                            interpolation=cv2.INTER_CUBIC)
                else:
                    img_scaled = img
                
                result_pp = pp_engine(img_scaled)
                for item in result_pp:
                    if item.get('type') == 'table':
                        html = item.get('res', {}).get('html', '')
                        if html:
                            rows = parse_html_table(html)
                            if rows:
                                # Apply domain correction
                                corrected_rows = []
                                for row in rows:
                                    corrected = [correct_technical_text(cell) for cell in row]
                                    corrected_rows.append(corrected)
                                text = "\n".join(" | ".join(r) for r in corrected_rows)
                                print(f"      PPStructure: {len(corrected_rows)} rows")
                                return {"rows": corrected_rows, "text": text, "html": html}
            except Exception as e:
                print(f"      PPStructure error: {e}")
    
    # Strategy 3: Contour-based cell detection
    print(f"      Trying contour-based table OCR...")
    result = ocr_table_manual(img, img_path, backend)
    
    # Apply domain correction to final result
    if result.get("rows"):
        corrected_rows = []
        for row in result["rows"]:
            corrected = [correct_technical_text(cell) for cell in row]
            corrected_rows.append(corrected)
        result["rows"] = corrected_rows
        result["text"] = "\n".join(" | ".join(r) for r in corrected_rows)
    
    return result


def ocr_table_manual(img, img_path, backend="paddle"):
    cells = detect_table_structure(img)
    
    if cells:
        ocr_results = []
        for (x1, y1, x2, y2) in cells:
            cell_w, cell_h = x2 - x1, y2 - y1
            img_h, img_w = img.shape[:2]
            if cell_w > img_w * 0.9 and cell_h > img_h * 0.9:
                continue
            if cell_w < 15 or cell_h < 15:
                continue
            
            pad = 3
            cy1 = max(0, y1 - pad)
            cx1 = max(0, x1 - pad)
            cy2 = min(img.shape[0], y2 + pad)
            cx2 = min(img.shape[1], x2 + pad)
            cell_img = img[cy1:cy2, cx1:cx2]
            
            text = ocr_cell_improved(cell_img, backend=backend)
            if text:
                ocr_results.append({
                    "text": text.strip(),
                    "x": (x1 + x2) // 2,
                    "y": (y1 + y2) // 2,
                    "box": (x1, y1, x2, y2)
                })
        
        if ocr_results:
            rows = group_rows(ocr_results, vertical_thresh_ratio=0.5)
            return {
                "rows": rows,
                "text": "\n".join(" | ".join(r) for r in rows)
            }
    
    return ocr_table_fullimage(img, backend)


_pp_structure = None

def get_pp_structure():
    global _pp_structure
    if _pp_structure is not None:
        return _pp_structure
    try:
        from paddleocr import PPStructure
        print("[INFO] Initializing PPStructure...")
        _pp_structure = PPStructure(
            table=True, ocr=True, lang='vi',
            show_log=False, use_gpu=(DEVICE == "cuda"),
        )
        return _pp_structure
    except Exception as e:
        print(f"[WARN] PPStructure init failed: {e}")
        return None


def parse_html_table(html_str):
    rows = []
    tr_pattern = re.findall(r'<tr>(.*?)</tr>', html_str, re.DOTALL)
    for tr in tr_pattern:
        cells = re.findall(r'<td[^>]*>(.*?)</td>', tr, re.DOTALL)
        clean_cells = []
        for cell in cells:
            clean = re.sub(r'<[^>]+>', '', cell).strip()
            clean_cells.append(clean)
        if clean_cells:
            rows.append(clean_cells)
    return rows


def ocr_table_fullimage(img, backend="paddle"):
    if backend == "surya":
        text = ocr_with_surya(img, langs=["vi", "en"])
        lines = [line.strip() for line in text.split("\n") if line.strip()]
        rows = [[line] for line in lines]
        return {"rows": rows, "text": text}
    
    reader = get_paddle_reader('vi') if backend == "paddle" else get_easyocr_reader()
    if reader is None:
        reader = get_easyocr_reader()
    
    img_proc = preprocess_for_ocr(img, min_width=1500, mode="table")
    items, _ = ocr_single_pass(reader, img_proc)
    
    if not items:
        # Try handwriting preprocessing
        img_hw = preprocess_for_handwriting(img, min_width=1800)
        items, _ = ocr_single_pass(reader, img_hw)
    
    if not items:
        return {"rows": [], "text": ""}
    
    # Apply corrections
    for item in items:
        item["text"] = correct_technical_text(item["text"])
    
    rows = group_rows(items, vertical_thresh_ratio=0.6)
    return {"rows": rows, "text": "\n".join(" | ".join(r) for r in rows)}


# ============================================================
# GROUP ROWS
# ============================================================

def group_rows(items, vertical_thresh_ratio=0.6):
    if not items:
        return []
    items_sorted = sorted(items, key=lambda x: x["y"])
    y_vals = [it["y"] for it in items_sorted]
    
    if len(y_vals) > 1:
        gaps = [y_vals[i+1] - y_vals[i] for i in range(len(y_vals)-1)]
        median_gap = np.median(gaps)
        thresh = max(8, median_gap * vertical_thresh_ratio)
    else:
        thresh = 12
    
    rows = []
    current_row = [items_sorted[0]]
    for it in items_sorted[1:]:
        if it["y"] - current_row[-1]["y"] < thresh:
            current_row.append(it)
        else:
            current_row.sort(key=lambda x: x["x"])
            rows.append(current_row)
            current_row = [it]
    current_row.sort(key=lambda x: x["x"])
    rows.append(current_row)
    
    return [[it["text"] for it in row] for row in rows]


# ============================================================
# POST-PROCESSING
# ============================================================

def post_process_ocr_text(text):
    if not text:
        return text
    
    text = re.sub(r'(?<=[0-9])O(?=[0-9])', '0', text)
    text = re.sub(r'(?<=M)O', '0', text)
    text = re.sub(r'(?<=Ø)O', '0', text)
    text = re.sub(r'(?<=[0-9])[lI](?=[0-9])', '1', text)
    text = re.sub(r'(\d+)\s*[xX]\s*(\d+)', r'\1×\2', text)
    text = re.sub(r'\s+', ' ', text).strip()
    
    # Domain correction
    text = correct_technical_text(text)
    
    return text


# ============================================================
# OCR NOTE
# ============================================================

def ocr_note(img_path, backend="paddle"):
    img = cv2.imread(img_path)
    if img is None:
        return ""
    
    if backend == "surya":
        text = ocr_with_surya(img, langs=["vi", "en"])
        lines = [line.strip() for line in text.split("\n") if line.strip()]
        processed = [post_process_ocr_text(t) for t in lines]
        return "\n".join(processed)
    
    reader_vi = get_paddle_reader('vi') if backend == "paddle" else None
    reader_en = get_paddle_reader('en') if backend == "paddle" else None
    
    if reader_vi is None and reader_en is None:
        reader_vi = get_easyocr_reader()
    
    best_items = []
    best_conf = 0.0
    
    if reader_vi:
        items, conf = multi_pass_ocr(img, reader_vi, "note")
        if conf > best_conf:
            best_conf = conf
            best_items = items
    
    if reader_en:
        items, conf = multi_pass_ocr(img, reader_en, "note")
        if conf > best_conf:
            best_conf = conf
            best_items = items
    
    texts = [it["text"] for it in best_items]
    processed = [post_process_ocr_text(t) for t in texts]
    processed = [t for t in processed if t]
    
    return "\n".join(processed)


# ============================================================
# MAIN PIPELINE
# ============================================================

def run_pipeline(image_path, output_dir="outputs",
                 checkpoint="best.pt", conf_thresh=0.3,
                 ocr_backend="paddle"):
    image_path = str(image_path)
    img_name = Path(image_path).name
    stem = Path(image_path).stem
    crop_dir = Path(output_dir) / stem / "crops"
    crop_dir.mkdir(parents=True, exist_ok=True)
    
    model = get_det_model(checkpoint)
    results = model(image_path, imgsz=1024, conf=conf_thresh,
                    iou=0.5, device=DEVICE, verbose=False)
    
    img_bgr = cv2.imread(image_path)
    if img_bgr is None:
        raise ValueError(f"Cannot read: {image_path}")
    
    objects = []
    for i, box in enumerate(results[0].boxes):
        x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
        cls_idx = int(box.cls[0])
        conf_val = round(float(box.conf[0]), 4)
        cls_raw = CLASS_NAMES[cls_idx]
        cls_show = CLASS_DISPLAY[cls_raw]
        
        pad = 10
        crop = img_bgr[max(0, y1-pad):min(img_bgr.shape[0], y2+pad),
                       max(0, x1-pad):min(img_bgr.shape[1], x2+pad)]
        crop_path = str(crop_dir / f"{cls_show}_{i+1}.jpg")
        cv2.imwrite(crop_path, crop, [cv2.IMWRITE_JPEG_QUALITY, 98])
        
        ocr_content = None
        if cls_raw == "note":
            print(f"[OCR] Note #{i+1} ({x2-x1}x{y2-y1}px)...")
            ocr_content = ocr_note(crop_path, backend=ocr_backend)
            print(f"      → {repr(ocr_content[:120]) if ocr_content else 'EMPTY'}")
        elif cls_raw == "table":
            print(f"[OCR] Table #{i+1} ({x2-x1}x{y2-y1}px)...")
            ocr_content = ocr_table(crop_path, backend=ocr_backend)
            preview = ocr_content.get("text", "")[:120]
            print(f"      → {repr(preview) if preview else 'EMPTY'}")
        
        objects.append({
            "id": i+1, "class": cls_show,
            "confidence": conf_val,
            "bbox": {"x1": x1, "y1": y1, "x2": x2, "y2": y2},
            "crop_path": crop_path,
            "ocr_content": ocr_content,
        })
        
        color = COLORS[cls_raw]
        cv2.rectangle(img_bgr, (x1, y1), (x2, y2), color, 2)
        label = f"{cls_show} {conf_val:.2f}"
        (tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
        cv2.rectangle(img_bgr, (x1, y1-th-10), (x1+tw+8, y1), color, -1)
        cv2.putText(img_bgr, label, (x1+4, y1-4),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
    
    vis_path = str(Path(output_dir) / stem / "result_vis.jpg")
    cv2.imwrite(vis_path, img_bgr)
    
    result = {"image": img_name, "objects": objects}
    json_path = str(Path(output_dir) / stem / "result.json")
    with open(json_path, "w", encoding="utf-8") as f:
        json.dump(result, f, ensure_ascii=False, indent=2)
    
    print(f"[✓] {len(objects)} objects | {vis_path} | {json_path}")
    return result, vis_path


if __name__ == "__main__":
    import sys
    img = sys.argv[1] if len(sys.argv) > 1 else "test.jpg"
    backend = sys.argv[2] if len(sys.argv) > 2 else "easyocr"
    result, _ = run_pipeline(img, ocr_backend=backend)
    print(json.dumps(result, ensure_ascii=False, indent=2))