Spaces:
Sleeping
Sleeping
File size: 909 Bytes
33ddb61 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 | import json
from pathlib import Path
for split in ['combined_train.json', 'combined_val.json', 'combined_test.json']:
path = Path('data2') / split
if not path.exists():
continue
with open(path, encoding='utf-8') as f:
records = json.load(f)
total = len(records)
with_labels = 0
total_boxes = 0
entity_boxes = 0
for r in records:
box_ids = r.get('box_label_ids', [])
total_boxes += len(box_ids)
if box_ids and any(lid != 0 for lid in box_ids):
with_labels += 1
entity_boxes += sum(1 for lid in box_ids if lid != 0)
print(f'\n{split}:')
print(f' Records: {total} total, {with_labels} with entities')
print(f' Boxes: {total_boxes} total, {entity_boxes} entity boxes')
if total > 0:
print(f' Entity rate: {100*entity_boxes/total_boxes if total_boxes > 0 else 0:.2f}%')
|