Blood_Cell_Object_Detection / finetune_model.py
Saini16's picture
Upload 9 files
d2b859c verified
"""
This script is meant to be run in Google Colab for fine-tuning the YOLOv10 model on the BCCD dataset.
It contains all the steps needed for training and should be run before deploying the application.
"""
import os
import glob
import zipfile
import requests
import xml.etree.ElementTree as ET
from pathlib import Path
import shutil
import ultralytics
from ultralytics import YOLO
import numpy as np
import time
def download_bccd_dataset():
"""
Downloads the BCCD dataset from the GitHub repository.
Returns the path to the dataset directory.
"""
# Install dependencies if needed
os.system('pip install ultralytics gdown')
# Clone the repository
os.system('git clone https://github.com/Shenggan/BCCD_Dataset.git')
# Verify download
dataset_dir = Path('BCCD_Dataset')
if not dataset_dir.exists():
print("Failed to download dataset using git. Trying alternative download...")
# Alternative download method using direct download links
os.makedirs('BCCD_Dataset/BCCD', exist_ok=True)
url = "https://github.com/Shenggan/BCCD_Dataset/archive/refs/heads/master.zip"
r = requests.get(url, allow_redirects=True)
with open('bccd_dataset.zip', 'wb') as f:
f.write(r.content)
# Extract the zipfile
with zipfile.ZipFile('bccd_dataset.zip', 'r') as zip_ref:
zip_ref.extractall('.')
# Move contents to the expected location
extracted_dir = Path('BCCD_Dataset-master')
if extracted_dir.exists():
# Copy contents to the BCCD_Dataset directory
for item in extracted_dir.glob('*'):
if item.is_dir():
shutil.copytree(item, dataset_dir / item.name)
else:
shutil.copy(item, dataset_dir / item.name)
print("Dataset downloaded successfully.")
return dataset_dir
def setup_dataset_for_yolo(dataset_path):
"""
Prepares the BCCD dataset for YOLO format.
Args:
dataset_path: Path to the downloaded dataset
Returns:
Path to the processed dataset
"""
yolo_dir = Path('BCCD_YOLO')
os.makedirs(yolo_dir, exist_ok=True)
# Create directory structure
for split in ['train', 'val', 'test']:
os.makedirs(yolo_dir / split / 'images', exist_ok=True)
os.makedirs(yolo_dir / split / 'labels', exist_ok=True)
# Map sources to destinations
splits = {
'train': dataset_path / 'BCCD' / 'train',
'val': dataset_path / 'BCCD' / 'val',
'test': dataset_path / 'BCCD' / 'test'
}
# Process each split
for split_name, split_dir in splits.items():
image_files = list(split_dir.glob('*.jpg'))
for img_file in image_files:
# Copy image
shutil.copy(img_file, yolo_dir / split_name / 'images' / img_file.name)
# Convert annotation
xml_file = split_dir / f"{img_file.stem}.xml"
if xml_file.exists():
txt_file = yolo_dir / split_name / 'labels' / f"{img_file.stem}.txt"
convert_annotations(xml_file, txt_file)
return yolo_dir
def convert_annotations(xml_path, txt_path):
"""
Converts XML annotations to YOLO format TXT files.
Args:
xml_path: Path to XML annotation file
txt_path: Path to output TXT file
"""
tree = ET.parse(xml_path)
root = tree.getroot()
# Get image dimensions
size = root.find('size')
img_width = int(size.find('width').text)
img_height = int(size.find('height').text)
# Map class names to IDs
class_map = {'RBC': 0, 'WBC': 1, 'Platelets': 2}
with open(txt_path, 'w') as f:
for obj in root.findall('object'):
cls_name = obj.find('name').text
if cls_name not in class_map:
continue
cls_id = class_map[cls_name]
# Get bounding box coordinates
bbox = obj.find('bndbox')
x_min = float(bbox.find('xmin').text)
y_min = float(bbox.find('ymin').text)
x_max = float(bbox.find('xmax').text)
y_max = float(bbox.find('ymax').text)
# Convert to YOLO format: center_x, center_y, width, height
x_center = (x_min + x_max) / (2.0 * img_width)
y_center = (y_min + y_max) / (2.0 * img_height)
width = (x_max - x_min) / img_width
height = (y_max - y_min) / img_height
# Write to file
f.write(f"{cls_id} {x_center} {y_center} {width} {height}\n")
def create_dataset_yaml(dataset_path):
"""
Creates the YAML file required by YOLOv10 for training.
Args:
dataset_path: Path to the processed dataset
"""
yaml_content = f"""
# YOLOv10 dataset config for BCCD
path: {dataset_path.absolute()} # Root directory
train: train/images # Train images relative to path
val: val/images # Validation images relative to path
test: test/images # Test images relative to path
# Classes
names:
0: RBC
1: WBC
2: Platelets
# Number of classes
nc: 3
"""
yaml_path = dataset_path / 'bccd.yaml'
with open(yaml_path, 'w') as f:
f.write(yaml_content)
return yaml_path
def train_model(dataset_path):
"""
Trains YOLOv10 on the BCCD dataset.
Args:
dataset_path: Path to the processed dataset
Returns:
Path to the trained model
"""
# Create YAML config file
yaml_path = create_dataset_yaml(dataset_path)
# Import required modules
import torch
# Load a pretrained YOLOv10 model
# Note: Use 'yolov10n.pt' for faster training, or 'yolov10s.pt' for better accuracy
model = YOLO('yolov10n.pt') # Nano model
# Train the model
device = '0' if torch.cuda.is_available() else 'cpu'
print(f"Training on device: {device}")
results = model.train(
data=str(yaml_path),
epochs=50, # Number of epochs
imgsz=640, # Image size
batch=16, # Batch size
patience=15, # Early stopping patience
device=device,
project='BCCD_Training',
name='yolov10_bccd',
seed=42,
workers=8 if torch.cuda.is_available() else 1
)
# Get the path to the best model
best_model_path = Path('BCCD_Training/yolov10_bccd/weights/best.pt')
# Export the model to other formats if needed
model.export(format='onnx')
# Copy model to Google Drive if running in Colab
try:
from google.colab import drive
drive_path = Path('/content/drive/MyDrive/BCCD_Model')
drive_path.mkdir(exist_ok=True, parents=True)
model_save_path = drive_path / 'yolov10_bccd.pt'
shutil.copy(best_model_path, model_save_path)
print(f"Model saved to Google Drive at {model_save_path}")
except:
print("Not running in Colab or couldn't mount Google Drive.")
return best_model_path
def main():
"""
Main function to execute the fine-tuning process.
"""
start_time = time.time()
print("Step 1: Downloading BCCD dataset...")
dataset_path = download_bccd_dataset()
print("Step 2: Setting up dataset in YOLO format...")
yolo_dataset_path = setup_dataset_for_yolo(dataset_path)
print("Step 3: Training YOLOv10 model...")
trained_model_path = train_model(yolo_dataset_path)
elapsed_time = (time.time() - start_time) / 60
print(f"Training completed in {elapsed_time:.2f} minutes.")
print(f"Trained model saved at: {trained_model_path}")
return trained_model_path
if __name__ == "__main__":
main()