terra-cognita-frugal-ai

Sleeping

File size: 13,417 Bytes

from fastapi import APIRouter
print(1)
from datetime import datetime
from datasets import load_dataset
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score
import random
import os
from ultralytics import YOLO

from .utils.evaluation import ImageEvaluationRequest
print(2)
from .utils.emissions import tracker, clean_emissions_data, get_space_info

from dotenv import load_dotenv
load_dotenv()

router = APIRouter()

import torch

# Get CUDA version (the one PyTorch was compiled with)
print("CUDA version:", torch.version.cuda)

# Get cuDNN version
print("cuDNN version:", torch.backends.cudnn.version())

#MODEL_TYPE = "YOLOv11n"
DESCRIPTION = f"best_YOLOv11n_640_half_batch_64.engine on TensorRT"
ROUTE = "/image"

def parse_boxes(annotation_string):
    """Parse multiple boxes from a single annotation string.
    Each box has 5 values: class_id, x_center, y_center, width, height"""
    values = [float(x) for x in annotation_string.strip().split()]
    boxes = []
    # Each box has 5 values
    for i in range(0, len(values), 5):
        if i + 5 <= len(values):
            # Skip class_id (first value) and take the next 4 values
            box = values[i+1:i+5]
            boxes.append(box)
    return boxes

def compute_iou(box1, box2):
    """Compute Intersection over Union (IoU) between two YOLO format boxes."""
    # Convert YOLO format (x_center, y_center, width, height) to corners
    def yolo_to_corners(box):
        x_center, y_center, width, height = box
        x1 = x_center - width/2
        y1 = y_center - height/2
        x2 = x_center + width/2
        y2 = y_center + height/2
        return np.array([x1, y1, x2, y2])
    
    box1_corners = yolo_to_corners(box1)
    box2_corners = yolo_to_corners(box2)
    
    # Calculate intersection
    x1 = max(box1_corners[0], box2_corners[0])
    y1 = max(box1_corners[1], box2_corners[1])
    x2 = min(box1_corners[2], box2_corners[2])
    y2 = min(box1_corners[3], box2_corners[3])
    
    intersection = max(0, x2 - x1) * max(0, y2 - y1)
    
    # Calculate union
    box1_area = (box1_corners[2] - box1_corners[0]) * (box1_corners[3] - box1_corners[1])
    box2_area = (box2_corners[2] - box2_corners[0]) * (box2_corners[3] - box2_corners[1])
    union = box1_area + box2_area - intersection
    
    return intersection / (union + 1e-6)

def compute_max_iou(true_boxes, pred_box):
    """Compute maximum IoU between a predicted box and all true boxes"""
    max_iou = 0
    for true_box in true_boxes:
        iou = compute_iou(true_box, pred_box)
        max_iou = max(max_iou, iou)
    return max_iou

def load_model(path_to_model, model_type="YOLO"):
    if model_type == "YOLO":
        model = YOLO(path_to_model)
    else:
        raise NotImplementedError
    return model

def get_boxes_list(predictions):
    return [box.tolist() for box in predictions.boxes.xywhn]

@router.post(ROUTE, tags=["Image Task"],
             description=DESCRIPTION)
async def evaluate_image(request: ImageEvaluationRequest):
    """
    Evaluate image classification and object detection for forest fire smoke.
    
    Current Model: Random Baseline
    - Makes random predictions for both classification and bounding boxes
    - Used as a baseline for comparison
    
    Metrics:
    - Classification accuracy: Whether an image contains smoke or not
    - Object Detection accuracy: IoU (Intersection over Union) for smoke bounding boxes
    """
    # Get space info
    username, space_url = get_space_info()
    
    # Load and prepare the dataset
    dataset = load_dataset(request.dataset_name, token=os.getenv("HF_TOKEN"))
    
    # Split dataset
    train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
    test_dataset = train_test["test"]
    
    # Start tracking emissions
    tracker.start()
    tracker.start_task("inference")
    
    #--------------------------------------------------------------------------------------------
    # YOUR MODEL INFERENCE CODE HERE
    # Update the code below to replace the random baseline with your model inference
    #--------------------------------------------------------------------------------------------   
    
    import cv2
    import onnxruntime
    import matplotlib.pyplot as plt

    #PATH_TO_MODEL = 'models/best_YOLOv11n_1280.onnx'
    #PATH_TO_MODEL = 'models/best_yolov6n_1280.pt'
    #PATH_TO_MODEL = 'models/best_YOLOv11n_1280_real_half.onnx'
    PATH_TO_MODEL = 'models/best_YOLOv11n_640_half_batch_32.engine'
    INFERENCE_ENGINE_TYPE = 'pt'
    INPUT_SIZE = 640
    N_TEST_BATCHES = 2
    BATCH_SIZE = 32 # Can be adjusted as needed
    print("PATH_TO_MODEL", PATH_TO_MODEL)

        
    print("Starting inference")
    predictions = []
    true_labels = []
    pred_boxes = []
    true_boxes_list = []  # List of lists, each inner list contains boxes for one image
    
    n_examples = len(test_dataset)
    n_boxes = []
    model = YOLO(PATH_TO_MODEL)
    print("PATH_TO_MODEL", PATH_TO_MODEL)

    # First pass - process annotations
    start_time = datetime.now()
    has_smoke_list = []
    annotations_list = []
    for i, example in enumerate(test_dataset):
        if i % 200 == 0:
            print(f"Processing annotations {i+1} of {n_examples}")
        annotation = example.get("annotations", "").strip()
        has_smoke = len(annotation) > 0
        has_smoke_list.append(has_smoke)
        true_labels.append(int(has_smoke))
        annotations_list.append(annotation)
        if i == (N_TEST_BATCHES+1)*BATCH_SIZE-1:
            #break
            pass

    all_preds = []
    all_scores = []
    all_binary_classifications = []
    end_annotations = datetime.now()
    print("Time taken to process annotations", end_annotations - start_time)

    # Second pass - batch predictions
    batch_preprocessing_times = []
    inference_times = []
    postprocessing_times = []

    start_predictions = datetime.now()
    for i, batch_start in enumerate(range(0, n_examples, BATCH_SIZE)):
        start_batch = datetime.now()
        batch_end = min(batch_start + BATCH_SIZE, n_examples)
        if i % 100 == 0:
            print(f"Processing batch {batch_start//BATCH_SIZE + 1} of {(n_examples + BATCH_SIZE - 1)//BATCH_SIZE}")
            print(f"Batch start: {batch_start}, Batch end: {batch_end}")
        
        # Get batch of images and pad if needed
        batch_images = []
        for j in range(batch_start, batch_end):
            batch_images.append(test_dataset[j]['image'])
        
        # Pad the last batch if needed
        if len(batch_images) < BATCH_SIZE:
            print(f"Padding last batch from {len(batch_images)} to {BATCH_SIZE} images")
            padding_needed = BATCH_SIZE - len(batch_images)
            # Duplicate the last image to fill the batch
            batch_images.extend([batch_images[-1]] * padding_needed)
        end_batch_preprocessing = datetime.now()
        batch_preprocessing_times.append(end_batch_preprocessing - start_batch)

        # Get predictions for batch
        start_inference = datetime.now()
        results = model.predict(batch_images, imgsz=INPUT_SIZE, verbose=True)
        end_inference = datetime.now()
        inference_times.append(end_inference - start_inference)
        
        # Only process the actual examples (not padding)
        start_postprocessing = datetime.now()
        actual_results = results[:batch_end-batch_start]
        batch_preds = [x.boxes.xywhn.tolist()[0] if len(x.boxes.xywhn.tolist()) > 0 else [] for x in actual_results] # Only the first box for simplicity
        batch_scores = [x.boxes.conf.tolist()[0] if len(x.boxes.conf.tolist()) > 0 else [] for x in actual_results]
        batch_binary_classifications = [int(len(x.boxes.xywhn.tolist()) > 0) for x in actual_results]

        all_preds += batch_preds
        all_scores += batch_scores
        all_binary_classifications += batch_binary_classifications
        end_postprocessing = datetime.now()
        postprocessing_times.append(end_postprocessing - start_postprocessing)

    pred_boxes = []

    start_final_processing = datetime.now()
    for idx in range(len(all_preds)):
        if has_smoke_list[idx]:
            # Parse true boxes
            image_true_boxes = parse_boxes(annotations_list[idx])
            true_boxes_list.append(image_true_boxes)
            
            # Process predicted boxes
            try:
                if len(all_preds[idx]) < 1:
                    model_preds = [0, 0, 0, 0]
                else:
                    model_preds = all_preds[idx]
            except:
                model_preds = [0, 0, 0, 0]
            pred_boxes.append(model_preds)

    end_final_processing = datetime.now()
    final_processing_time = end_final_processing - start_final_processing
    full_pipeline_time = end_final_processing - start_time
    annotations_time= end_annotations - start_time
    print("Processing completed with last index", idx)
    print("Time taken to process final processing", final_processing_time)
    print("Time taken for full pipeline", full_pipeline_time)
    print("Time taken to process annotations", annotations_time)

    batch_preprocessing_times_seconds = [t.total_seconds() for t in batch_preprocessing_times]
    inference_times_seconds = [t.total_seconds() for t in inference_times]
    postprocessing_times_seconds = [t.total_seconds() for t in postprocessing_times]

    total_batch_preprocessing_time = sum(batch_preprocessing_times_seconds)
    total_inference_time = sum(inference_times_seconds)
    total_postprocessing_time = sum(postprocessing_times_seconds)
    avg_batch_preprocessing_time = np.mean(batch_preprocessing_times_seconds)
    avg_inference_time = np.mean(inference_times_seconds)
    avg_postprocessing_time = np.mean(postprocessing_times_seconds)
    std_batch_preprocessing_time = np.std(batch_preprocessing_times_seconds)
    std_inference_time = np.std(inference_times_seconds)
    std_postprocessing_time = np.std(postprocessing_times_seconds)
    # Compute sum, mean, and std on numerical values
    print(
        "Time taken to process batch preprocessing", 
        total_batch_preprocessing_time, 
        "\navg:", avg_batch_preprocessing_time, 
        "\nstd:", std_batch_preprocessing_time
    )
    print("Time taken to process inference", 
          total_inference_time, 
          "\navg:", avg_inference_time,
          "\nstd:", std_inference_time)
    
    postprocessing_times_seconds = [t.total_seconds() for t in postprocessing_times]
    print("Time taken to process postprocessing",
          total_postprocessing_time, 
          "\navg:", avg_postprocessing_time,
          "\nstd:", std_postprocessing_time)
    #--------------------------------------------------------------------------------------------
    # YOUR MODEL INFERENCE STOPS HERE
    #--------------------------------------------------------------------------------------------   
    
    # Stop tracking emissions
    emissions_data = tracker.stop_task()
    predictions = all_binary_classifications
    # Calculate classification metrics
    classification_accuracy = accuracy_score(true_labels, predictions)
    classification_precision = precision_score(true_labels, predictions)
    classification_recall = recall_score(true_labels, predictions)
    
    # Calculate mean IoU for object detection (only for images with smoke)
    # For each image, we compute the max IoU between the predicted box and all true boxes
    print("Calculating mean IoU")
    ious = []
    for true_boxes, pred_box in zip(true_boxes_list, pred_boxes):
        max_iou = compute_max_iou(true_boxes, pred_box)
        ious.append(max_iou)
    
    mean_iou = float(np.mean(ious)) if ious else 0.0
    print("Mean IoU calculated")    
    # Prepare results dictionary
    results = {
        "username": username,
        "space_url": space_url,
        "submission_timestamp": datetime.now().isoformat(),
        "model_description": DESCRIPTION,
        "classification_accuracy": float(classification_accuracy),
        "classification_precision": float(classification_precision),
        "classification_recall": float(classification_recall),
        "mean_iou": mean_iou,
        "energy_consumed_wh": emissions_data.energy_consumed * 1000,
        "emissions_gco2eq": emissions_data.emissions * 1000,
        "emissions_data": clean_emissions_data(emissions_data),
        "api_route": ROUTE,
        "dataset_config": {
            "dataset_name": request.dataset_name,
            "test_size": request.test_size,
            "test_seed": request.test_seed
        },
        "times": {
            "full_pipeline": full_pipeline_time,
            "annotations": annotations_time,
            "final_processing": final_processing_time,
            "batch_preprocessing": total_batch_preprocessing_time,
            "inference": total_inference_time,
            "postprocessing": total_postprocessing_time,
            "batch_preprocessing_avg": avg_batch_preprocessing_time,
            "inference_avg": avg_inference_time,
            "postprocessing_avg": avg_postprocessing_time,
            "batch_preprocessing_std": std_batch_preprocessing_time,
            "inference_std": std_inference_time,
            "postprocessing_std": std_postprocessing_time
        }
    }
    print("Result returned")
    return results