Spaces:
Sleeping
Sleeping
| import json | |
| from pathlib import Path | |
| for split in ['combined_train.json', 'combined_val.json', 'combined_test.json']: | |
| path = Path('data2') / split | |
| if not path.exists(): | |
| continue | |
| with open(path, encoding='utf-8') as f: | |
| records = json.load(f) | |
| total = len(records) | |
| with_labels = 0 | |
| total_boxes = 0 | |
| entity_boxes = 0 | |
| for r in records: | |
| box_ids = r.get('box_label_ids', []) | |
| total_boxes += len(box_ids) | |
| if box_ids and any(lid != 0 for lid in box_ids): | |
| with_labels += 1 | |
| entity_boxes += sum(1 for lid in box_ids if lid != 0) | |
| print(f'\n{split}:') | |
| print(f' Records: {total} total, {with_labels} with entities') | |
| print(f' Boxes: {total_boxes} total, {entity_boxes} entity boxes') | |
| if total > 0: | |
| print(f' Entity rate: {100*entity_boxes/total_boxes if total_boxes > 0 else 0:.2f}%') | |