BakoAI / scripts /fix_dataset.py
Okidi Norbert
Deployment fix: clean backend only
c6abe34
import os
import yaml
def fix_dataset(dataset_dir):
label_dirs = ['train/labels', 'valid/labels', 'test/labels']
# Mapping old class IDs to new class IDs
# Old: 0: basketbal, 1: basketball, 2: court, 3: hoop, 4: player, 5: referee, 6: shot-clock
# New: 0: basketball, 1: court, 2: hoop, 3: player, 4: referee, 5: shot-clock
mapping = {
'0': '0', # basketbal -> basketball
'1': '0', # basketball -> basketball
'2': '1', # court -> court
'3': '2', # hoop -> hoop
'4': '3', # player -> player
'5': '4', # referee -> referee
'6': '5' # shot-clock -> shot-clock
}
for ld in label_dirs:
full_path = os.path.join(dataset_dir, ld)
if not os.path.exists(full_path):
continue
print(f"Processing labels in {full_path}...")
for filename in os.listdir(full_path):
if filename.endswith('.txt'):
file_path = os.path.join(full_path, filename)
with open(file_path, 'r') as f:
lines = f.readlines()
new_lines = []
for line in lines:
parts = line.split()
if parts:
old_cls = parts[0]
if old_cls in mapping:
parts[0] = mapping[old_cls]
new_lines.append(" ".join(parts) + "\n")
else:
new_lines.append(line)
with open(file_path, 'w') as f:
f.writelines(new_lines)
# Fix data.yaml
yaml_path = os.path.join(dataset_dir, 'data.yaml')
with open(yaml_path, 'r') as f:
data = yaml.safe_load(f)
data['nc'] = 6
data['names'] = ['basketball', 'court', 'hoop', 'player', 'referee', 'shot-clock']
# Adjust paths to be absolute or correct relative to where we run training
# Current: train: ../train/images
# We want them to be relative to the data.yaml location or absolute
data['train'] = os.path.join(dataset_dir, 'train/images')
data['val'] = os.path.join(dataset_dir, 'valid/images')
if 'test' in data:
data['test'] = os.path.join(dataset_dir, 'test/images')
with open(yaml_path, 'w') as f:
yaml.dump(data, f)
print("Dataset fix complete!")
if __name__ == "__main__":
fix_dataset("datasets/nbl_dataset")