| | |
| | import math |
| | import os.path as osp |
| | import tempfile |
| |
|
| | from mmocr.datasets.ocr_dataset import OCRDataset |
| |
|
| |
|
| | def _create_dummy_ann_file(ann_file): |
| | ann_info1 = 'sample1.jpg hello' |
| | ann_info2 = 'sample2.jpg world' |
| |
|
| | with open(ann_file, 'w') as fw: |
| | for ann_info in [ann_info1, ann_info2]: |
| | fw.write(ann_info + '\n') |
| |
|
| |
|
| | def _create_dummy_loader(): |
| | loader = dict( |
| | type='HardDiskLoader', |
| | repeat=1, |
| | parser=dict(type='LineStrParser', keys=['file_name', 'text'])) |
| | return loader |
| |
|
| |
|
| | def test_detect_dataset(): |
| | tmp_dir = tempfile.TemporaryDirectory() |
| | |
| | ann_file = osp.join(tmp_dir.name, 'fake_data.txt') |
| | _create_dummy_ann_file(ann_file) |
| |
|
| | |
| | loader = _create_dummy_loader() |
| | dataset = OCRDataset(ann_file, loader, pipeline=[]) |
| |
|
| | tmp_dir.cleanup() |
| |
|
| | |
| | img_info = dataset.data_infos[0] |
| | results = dict(img_info=img_info) |
| | dataset.pre_pipeline(results) |
| | assert results['img_prefix'] == dataset.img_prefix |
| | assert results['text'] == img_info['text'] |
| |
|
| | |
| | metric = 'acc' |
| | results = [{'text': 'hello'}, {'text': 'worl'}] |
| | eval_res = dataset.evaluate(results, metric) |
| |
|
| | assert math.isclose(eval_res['word_acc'], 0.5, abs_tol=1e-4) |
| | assert math.isclose(eval_res['char_precision'], 1.0, abs_tol=1e-4) |
| | assert math.isclose(eval_res['char_recall'], 0.9, abs_tol=1e-4) |
| |
|