import torch import torch.nn as nn import torch.optim as optim import torchvision import torchvision.transforms as transforms from torch.utils.data import DataLoader, random_split from sklearn.metrics import f1_score, precision_score, recall_score from codecarbon import EmissionsTracker from thop import profile from tqdm import tqdm import time, pandas as pd, numpy as np, os, warnings, copy, gc # --- Configuration --- MODEL_NAME = "resnet50_EDEN" DATASET_NAME = "CustomImageNet300" # Path to the folder containing your 300 class folders directly DATA_PATH = r'C:\Users\shanm\Dataset Download\custom image net' BATCH_SIZE = 64 ACCUMULATION_STEPS = 8 # Effective Batch Size = 512 EPOCHS = 20 E_UNFREEZE = 10 LAMBDA_L1 = 1e-5 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") SAVE_DIR = "saved_models" os.makedirs(SAVE_DIR, exist_ok=True) CSV_FILENAME = f"{MODEL_NAME}_{DATASET_NAME}_stats.csv" warnings.filterwarnings("ignore") os.environ["CODECARBON_LOG_LEVEL"] = "error" def main(): # --- Phase 1: High-Resolution Initialization --- transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) print(f"[*] Loading {DATASET_NAME} from disk (80/20 Random Split)...") full_dataset = torchvision.datasets.ImageFolder(root=DATA_PATH, transform=transform) # Calculate split sizes train_size = int(0.8 * len(full_dataset)) val_size = len(full_dataset) - train_size train_dataset, _ = random_split( full_dataset, [train_size, val_size], generator=torch.Generator().manual_seed(42) ) trainloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True) # --- Model Setup --- model = torchvision.models.resnet50(weights='IMAGENET1K_V1') model.fc = nn.Linear(model.fc.in_features, 300) # Match 300 classes # 1. Profile on clone to avoid 'total_ops' hook error print("[*] Calculating hardware metrics (FLOPs/Params)...") model_for_profile = copy.deepcopy(model).to(DEVICE) dummy_input = torch.randn(1, 3, 224, 224).to(DEVICE) flops, params = profile(model_for_profile, inputs=(dummy_input, ), verbose=False) del model_for_profile # 2. Initially freeze backbone for EDEN Phase 2 for name, param in model.named_parameters(): if "fc" not in name: param.requires_grad = False model.to(DEVICE) criterion = nn.CrossEntropyLoss() optimizer = optim.AdamW(model.parameters(), lr=1e-3) scaler = torch.cuda.amp.GradScaler() results = [] cumulative_total_energy = 0 best_acc = 0.0 tracker = EmissionsTracker(measure_power_secs=1, save_to_file=False, log_level='error') print(f"\n[MODEL INFO] FLOPs: {flops/1e9:.2f} G | Parameters: {params/1e6:.2f} M | Classes: 300") print(f"{'='*140}") print(f"{'Epoch':<6} | {'Loss':<7} | {'Acc':<7} | {'Total(J)':<9} | {'VRAM(GB)':<9} | {'EAG':<8} | {'Status'}") print(f"{'-'*140}") for epoch in range(1, EPOCHS + 1): if epoch == E_UNFREEZE: for param in model.parameters(): param.requires_grad = True for pg in optimizer.param_groups: pg['lr'] = 1e-5 status_msg = "UNFROZEN" else: status_msg = "FROZEN" if epoch < E_UNFREEZE else "FINE-TUNING" model.train() tracker.start() epoch_start_time = time.time() running_loss, all_preds, all_labels = 0.0, [], [] # Real-time progress bar pbar = tqdm(enumerate(trainloader), total=len(trainloader), desc=f"Epoch {epoch:02d}", leave=False) optimizer.zero_grad() for i, (inputs, labels) in pbar: inputs, labels = inputs.to(DEVICE), labels.to(DEVICE) with torch.cuda.amp.autocast(): outputs = model(inputs) cls_loss = criterion(outputs, labels) l1_penalty = sum(p.abs().sum() for p in model.parameters() if p.requires_grad) loss = (cls_loss + LAMBDA_L1 * l1_penalty) / ACCUMULATION_STEPS scaler.scale(loss).backward() if (i + 1) % ACCUMULATION_STEPS == 0: scaler.unscale_(optimizer) torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) scaler.step(optimizer) scaler.update() optimizer.zero_grad() running_loss += cls_loss.item() _, predicted = torch.max(outputs.data, 1) all_preds.extend(predicted.cpu().numpy()); all_labels.extend(labels.cpu().numpy()) pbar.set_postfix({'loss': f"{cls_loss.item():.4f}"}) emissions_kg = tracker.stop() duration = time.time() - epoch_start_time # Energy Metrics (kWh to Joules) e_gpu = tracker.final_emissions_data.gpu_energy * 3600000 e_cpu = tracker.final_emissions_data.cpu_energy * 3600000 e_ram = tracker.final_emissions_data.ram_energy * 3600000 total_energy = e_gpu + e_cpu + e_ram cumulative_total_energy += total_energy acc = (np.array(all_preds) == np.array(all_labels)).mean() vram_peak = torch.cuda.max_memory_allocated(DEVICE) / (1024**3) eag = acc / (total_energy / 1000) if total_energy > 0 else 0 # Detailed Audit Log for .csv epoch_stats = { "epoch": epoch, "status": status_msg, "loss": running_loss / len(trainloader), "accuracy": acc, "total_energy_j": total_energy, "cumulative_energy_j": cumulative_total_energy, "carbon_kg": emissions_kg, "vram_gb": vram_peak, "eag_metric": eag, "latency_ms": (duration / len(trainloader)) * 1000, "model_flops": flops, "model_params": params } results.append(epoch_stats) pd.DataFrame(results).to_csv(CSV_FILENAME, index=False) best_tag = "*" if acc > best_acc else "" if acc > best_acc: best_acc = acc torch.save(model.state_dict(), os.path.join(SAVE_DIR, f"BEST_{MODEL_NAME}_{DATASET_NAME}.pth")) print(f"{epoch:02d}/50 | {epoch_stats['loss']:.4f} | {acc:.2%} | {total_energy:<9.2f} | {vram_peak:<9.3f} | {eag:<8.4f} | {status_msg}{best_tag}") # Final memory flush for overnight batch stability del model, trainloader torch.cuda.empty_cache() gc.collect() print(f"{'='*140}\n[FINISH] ResNet-50 on ImageNet300 complete.") if __name__ == '__main__': main()