import torch import torch.nn as nn import torch.optim as optim import torchvision import torchvision.transforms as transforms from torchvision.datasets import ImageFolder from torch.utils.data import DataLoader from sklearn.metrics import f1_score, precision_score, recall_score from codecarbon import EmissionsTracker from thop import profile from tqdm import tqdm import time import pandas as pd import numpy as np import os import warnings from datetime import timedelta # --- Configuration --- MODEL_NAME = "vgg16" DATASET_NAME = "CustomImageNet300" DATA_PATH = r'C:\Users\shanm\Dataset Download\custom image net' BATCH_SIZE = 32 EPOCHS = 50 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") SAVE_DIR = "saved_models" os.makedirs(SAVE_DIR, exist_ok=True) CSV_FILENAME = f"{MODEL_NAME}_{DATASET_NAME}_stats.csv" warnings.filterwarnings("ignore") os.environ["CODECARBON_LOG_LEVEL"] = "error" def main(): # 1. Data Loading transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) if not os.path.exists(DATA_PATH): print(f"[ERROR] Dataset path not found: {DATA_PATH}") return trainset = ImageFolder(root=DATA_PATH, transform=transform) trainloader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True) # 2. Model setup model = torchvision.models.vgg16(weights=None) model.classifier[6] = nn.Linear(4096, 300) model.to(DEVICE) dummy_input = torch.randn(1, 3, 224, 224).to(DEVICE) flops, params = profile(model, inputs=(dummy_input, ), verbose=False) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS) results = [] cumulative_total_energy = 0 total_start_time = time.time() best_acc = 0.0 print(f"\n[MODEL INFO] FLOPs: {flops/1e9:.2f} G | Parameters: {params/1e6:.2f} M") print("="*125) print(f"TRAINING {MODEL_NAME.upper()} ON {DATASET_NAME}") print("-" * 125) try: for epoch in range(1, EPOCHS + 1): # START TRACKER FOR THIS EPOCH tracker = EmissionsTracker(measure_power_secs=1, save_to_file=False, log_level='error') tracker.start() model.train() epoch_start_time = time.time() running_loss, all_preds, all_labels, grad_norms = 0.0, [], [], [] pbar = tqdm(enumerate(trainloader), total=len(trainloader), desc=f"Epoch {epoch}/{EPOCHS}") for i, (inputs, labels) in pbar: inputs, labels = inputs.to(DEVICE), labels.to(DEVICE) optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, labels) loss.backward() grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=100) grad_norms.append(grad_norm.item()) optimizer.step() running_loss += loss.item() _, predicted = torch.max(outputs.data, 1) pbar.set_postfix({'loss': f'{running_loss/(i+1):.4f}'}) all_preds.extend(predicted.cpu().numpy()) all_labels.extend(labels.cpu().numpy()) scheduler.step() duration = time.time() - epoch_start_time # STOP TRACKER TO POPULATE final_emissions_data emissions_kg = tracker.stop() # --- CALCULATIONS (Now safe because tracker is stopped) --- e_gpu = tracker.final_emissions_data.gpu_energy * 3600000 e_cpu = tracker.final_emissions_data.cpu_energy * 3600000 e_ram = tracker.final_emissions_data.ram_energy * 3600000 total_energy = e_gpu + e_cpu + e_ram cumulative_total_energy += total_energy acc = (np.array(all_preds) == np.array(all_labels)).mean() f1 = f1_score(all_labels, all_preds, average='macro') vram_peak = torch.cuda.max_memory_allocated(DEVICE) / (1024**3) if torch.cuda.is_available() else 0 elapsed_total = time.time() - total_start_time avg_per_epoch = elapsed_total / epoch eta = str(timedelta(seconds=int(avg_per_epoch * (EPOCHS - epoch)))) # --- ALL REQUESTED STATS --- epoch_stats = { "epoch": epoch, "loss": running_loss / len(trainloader), "accuracy": acc, "f1_score": f1, "precision": precision_score(all_labels, all_preds, average='macro', zero_division=0), "recall": recall_score(all_labels, all_preds, average='macro', zero_division=0), "epoch_energy_gpu_j": e_gpu, "epoch_energy_cpu_j": e_cpu, "epoch_energy_ram_j": e_ram, "epoch_total_energy_j": total_energy, "cumulative_total_energy_j": cumulative_total_energy, "carbon_emissions_kg": emissions_kg, "vram_peak_gb": vram_peak, "avg_power_gpu_w": tracker.final_emissions_data.gpu_power, "avg_power_cpu_w": tracker.final_emissions_data.cpu_power, "avg_power_ram_w": tracker.final_emissions_data.ram_power, "latency_ms": (duration / len(trainloader)) * 1000, "avg_grad_norm": np.mean(grad_norms), "eag_metric": acc / (total_energy / 1000) if total_energy > 0 else 0, "it_per_sec": len(trainloader) / duration, "total_iterations": len(trainloader), "epoch_duration_sec": duration, "cumulative_time_sec": elapsed_total, "model_flops": flops, "model_parameters": params } results.append(epoch_stats) pd.DataFrame(results).to_csv(CSV_FILENAME, index=False) if acc > best_acc: best_acc = acc torch.save(model.state_dict(), os.path.join(SAVE_DIR, f"BEST_{MODEL_NAME}_{DATASET_NAME}.pth")) best_msg = " (Best Saved!)" else: best_msg = "" print(f"\nEpoch {epoch:02d} Summary: Loss: {epoch_stats['loss']:.4f} | Acc: {acc:.2%} | Energy: {total_energy:.2f}J | VRAM: {vram_peak:.2f}GB | ETA: {eta}{best_msg}\n") print("-" * 125) except Exception as e: print(f"\n[CRASH] Error: {e}") import traceback traceback.print_exc() finally: print(f"\n[SUCCESS] Training Complete. Results saved to {CSV_FILENAME}") if __name__ == '__main__': main()