import os
import yaml

def fix_dataset(dataset_dir):
    label_dirs = ['train/labels', 'valid/labels', 'test/labels']
    
    # Mapping old class IDs to new class IDs
    # Old: 0: basketbal, 1: basketball, 2: court, 3: hoop, 4: player, 5: referee, 6: shot-clock
    # New: 0: basketball, 1: court, 2: hoop, 3: player, 4: referee, 5: shot-clock
    mapping = {
        '0': '0', # basketbal -> basketball
        '1': '0', # basketball -> basketball
        '2': '1', # court -> court
        '3': '2', # hoop -> hoop
        '4': '3', # player -> player
        '5': '4', # referee -> referee
        '6': '5'  # shot-clock -> shot-clock
    }
    
    for ld in label_dirs:
        full_path = os.path.join(dataset_dir, ld)
        if not os.path.exists(full_path):
            continue
            
        print(f"Processing labels in {full_path}...")
        for filename in os.listdir(full_path):
            if filename.endswith('.txt'):
                file_path = os.path.join(full_path, filename)
                with open(file_path, 'r') as f:
                    lines = f.readlines()
                
                new_lines = []
                for line in lines:
                    parts = line.split()
                    if parts:
                        old_cls = parts[0]
                        if old_cls in mapping:
                            parts[0] = mapping[old_cls]
                            new_lines.append(" ".join(parts) + "\n")
                        else:
                            new_lines.append(line)
                
                with open(file_path, 'w') as f:
                    f.writelines(new_lines)

    # Fix data.yaml
    yaml_path = os.path.join(dataset_dir, 'data.yaml')
    with open(yaml_path, 'r') as f:
        data = yaml.safe_load(f)
    
    data['nc'] = 6
    data['names'] = ['basketball', 'court', 'hoop', 'player', 'referee', 'shot-clock']
    
    # Adjust paths to be absolute or correct relative to where we run training
    # Current: train: ../train/images
    # We want them to be relative to the data.yaml location or absolute
    data['train'] = os.path.join(dataset_dir, 'train/images')
    data['val'] = os.path.join(dataset_dir, 'valid/images')
    if 'test' in data:
        data['test'] = os.path.join(dataset_dir, 'test/images')

    with open(yaml_path, 'w') as f:
        yaml.dump(data, f)
    
    print("Dataset fix complete!")

if __name__ == "__main__":
    fix_dataset("datasets/nbl_dataset")