| """ |
| Script for fine-tuning YOLOv10 on the BCCD dataset. |
| This is designed to be run in Google Colab. |
| """ |
|
|
| import os |
| import shutil |
| import glob |
| import xml.etree.ElementTree as ET |
| from pathlib import Path |
| import yaml |
| import random |
| import numpy as np |
|
|
| def download_bccd_dataset(): |
| """ |
| Download the BCCD dataset from the GitHub repository. |
| """ |
| |
| os.system('pip install ultralytics') |
| |
| |
| os.system('git clone https://github.com/Shenggan/BCCD_Dataset.git') |
| |
| |
| data_dir = Path('BCCD_Dataset') |
| if data_dir.exists(): |
| print(f"Dataset downloaded to {data_dir.absolute()}") |
| return data_dir |
| else: |
| raise FileNotFoundError("Failed to download the dataset") |
|
|
| def prepare_yolo_format(data_dir): |
| """ |
| Prepare the dataset in YOLO format. |
| |
| Args: |
| data_dir (Path): Path to the BCCD dataset directory |
| """ |
| |
| yolo_dir = Path('BCCD_YOLO') |
| yolo_dir.mkdir(exist_ok=True) |
| |
| |
| train_dir = yolo_dir / 'train' |
| val_dir = yolo_dir / 'val' |
| test_dir = yolo_dir / 'test' |
| |
| for d in [train_dir, val_dir, test_dir]: |
| d.mkdir(exist_ok=True) |
| (d / 'images').mkdir(exist_ok=True) |
| (d / 'labels').mkdir(exist_ok=True) |
| |
| |
| train_src = data_dir / 'BCCD' / 'train' |
| test_src = data_dir / 'BCCD' / 'test' |
| val_src = data_dir / 'BCCD' / 'val' |
| |
| |
| process_dataset_split(train_src, train_dir) |
| |
| |
| process_dataset_split(val_src, val_dir) |
| |
| |
| process_dataset_split(test_src, test_dir) |
| |
| return yolo_dir |
|
|
| def process_dataset_split(src_dir, dest_dir): |
| """ |
| Process a dataset split (train, val, test) to YOLO format. |
| |
| Args: |
| src_dir (Path): Source directory with images and annotations |
| dest_dir (Path): Destination directory for YOLO format |
| """ |
| |
| img_files = list(src_dir.glob('*.jpg')) + list(src_dir.glob('*.png')) |
| for img_file in img_files: |
| shutil.copy(img_file, dest_dir / 'images' / img_file.name) |
| |
| |
| xml_files = list(src_dir.glob('*.xml')) |
| for xml_file in xml_files: |
| txt_file = dest_dir / 'labels' / (xml_file.stem + '.txt') |
| convert_annotation(xml_file, txt_file) |
|
|
| def convert_annotation(xml_path, output_path): |
| """ |
| Convert XML annotation to YOLO format. |
| |
| Args: |
| xml_path (Path): Path to XML annotation file |
| output_path (Path): Path to output YOLO format file |
| """ |
| tree = ET.parse(xml_path) |
| root = tree.getroot() |
| |
| size = root.find('size') |
| w = int(size.find('width').text) |
| h = int(size.find('height').text) |
| |
| with open(output_path, 'w') as f: |
| for obj in root.findall('object'): |
| class_name = obj.find('name').text |
| |
| |
| if class_name == 'RBC': |
| class_id = 0 |
| elif class_name == 'WBC': |
| class_id = 1 |
| elif class_name == 'Platelets': |
| class_id = 2 |
| else: |
| continue |
| |
| |
| bbox = obj.find('bndbox') |
| x_min = float(bbox.find('xmin').text) |
| y_min = float(bbox.find('ymin').text) |
| x_max = float(bbox.find('xmax').text) |
| y_max = float(bbox.find('ymax').text) |
| |
| |
| |
| x_center = (x_min + x_max) / (2.0 * w) |
| y_center = (y_min + y_max) / (2.0 * h) |
| bbox_width = (x_max - x_min) / w |
| bbox_height = (y_max - y_min) / h |
| |
| |
| f.write(f"{class_id} {x_center} {y_center} {bbox_width} {bbox_height}\n") |
|
|
| def create_dataset_yaml(data_dir): |
| """ |
| Create dataset YAML file for YOLO training. |
| |
| Args: |
| data_dir (Path): Path to the dataset directory |
| """ |
| yaml_content = { |
| 'path': str(data_dir.absolute()), |
| 'train': 'train/images', |
| 'val': 'val/images', |
| 'test': 'test/images', |
| 'names': { |
| 0: 'RBC', |
| 1: 'WBC', |
| 2: 'Platelets' |
| }, |
| 'nc': 3 |
| } |
| |
| with open(data_dir / 'dataset.yaml', 'w') as f: |
| yaml.dump(yaml_content, f, default_flow_style=False) |
| |
| return data_dir / 'dataset.yaml' |
|
|
| def apply_data_augmentation(data_dir): |
| """ |
| Apply data augmentation to the training set. |
| |
| Args: |
| data_dir (Path): Path to the dataset directory |
| """ |
| |
| |
| print("Applying data augmentation...") |
| |
| |
| |
| |
| |
| |
| |
| train_img_dir = data_dir / 'train' / 'images' |
| train_lbl_dir = data_dir / 'train' / 'labels' |
| |
| print(f"Training images: {len(list(train_img_dir.glob('*.jpg')))}") |
| print(f"Training labels: {len(list(train_lbl_dir.glob('*.txt')))}") |
| |
| print("Data augmentation complete.") |
|
|
| def train_yolov10(data_dir): |
| """ |
| Fine-tune YOLOv10 on the BCCD dataset. |
| |
| Args: |
| data_dir (Path): Path to the dataset directory |
| """ |
| |
| from ultralytics import YOLO |
| |
| |
| model = YOLO('yolov10n.pt') |
| |
| |
| yaml_path = data_dir / 'dataset.yaml' |
| results = model.train( |
| data=str(yaml_path), |
| epochs=50, |
| imgsz=640, |
| patience=10, |
| batch=16, |
| device='0' if torch.cuda.is_available() else 'cpu', |
| name='yolov10_bccd' |
| ) |
| |
| |
| output_dir = Path('/content/drive/MyDrive/yolov10_bccd') |
| output_dir.mkdir(exist_ok=True, parents=True) |
| model.export(format='onnx') |
| |
| |
| best_pt = Path(f"runs/train/yolov10_bccd/weights/best.pt") |
| if best_pt.exists(): |
| shutil.copy(best_pt, output_dir / 'yolov10_bccd_best.pt') |
| print(f"Model saved to {output_dir / 'yolov10_bccd_best.pt'}") |
| |
| return results |
|
|
| def main(): |
| """ |
| Main function to execute the fine-tuning process. |
| """ |
| |
| try: |
| from google.colab import drive |
| drive.mount('/content/drive') |
| print("Google Drive mounted successfully") |
| except: |
| print("Warning: Not running in Google Colab, or Drive mounting failed.") |
| |
| |
| data_dir = download_bccd_dataset() |
| print("Dataset downloaded") |
| |
| |
| yolo_dir = prepare_yolo_format(data_dir) |
| print("Dataset prepared in YOLO format") |
| |
| |
| yaml_path = create_dataset_yaml(yolo_dir) |
| print(f"Dataset YAML created at {yaml_path}") |
| |
| |
| apply_data_augmentation(yolo_dir) |
| |
| |
| results = train_yolov10(yolo_dir) |
| print("Training complete!") |
| |
| return results |
|
|
| if __name__ == "__main__": |
| main() |