EDEN-Core-Scripts / test3 /eden_UNet_ImageNet.py
Shanmuk4622's picture
Upload test3/eden_UNet_ImageNet.py with huggingface_hub
abc8030 verified
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import f1_score, precision_score, recall_score
from codecarbon import EmissionsTracker
from thop import profile
from tqdm import tqdm
import time, pandas as pd, numpy as np, os, warnings, copy, gc
# --- Configuration ---
MODEL_NAME = "unet_classifier_EDEN"
DATASET_NAME = "CustomImageNet300"
# Path to the folder containing your 300 class folders directly
DATA_PATH = r'C:\Users\shanm\Dataset Download\custom image net'
BATCH_SIZE = 64
ACCUMULATION_STEPS = 8 # Effective Batch Size = 512
EPOCHS = 20
E_UNFREEZE = 10
LAMBDA_L1 = 1e-5
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SAVE_DIR = "saved_models"
os.makedirs(SAVE_DIR, exist_ok=True)
CSV_FILENAME = f"{MODEL_NAME}_{DATASET_NAME}_stats.csv"
warnings.filterwarnings("ignore")
os.environ["CODECARBON_LOG_LEVEL"] = "error"
# --- U-Net Adaptation for Classification ---
class UNetClassifier(nn.Module):
def __init__(self, num_classes=300):
super(UNetClassifier, self).__init__()
# Encoder: Using a ResNet18 backbone
self.backbone = torchvision.models.resnet18(weights='IMAGENET1K_V1')
self.encoder = nn.Sequential(*list(self.backbone.children())[:-2])
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.classifier = nn.Linear(512, num_classes)
def forward(self, x):
x = self.encoder(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.classifier(x)
return x
def main():
# --- Phase 1: High-Resolution Initialization ---
transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
print(f"[*] Loading {DATASET_NAME} from disk (80/20 Random Split)...")
# Load from root since your folders are flat
full_dataset = torchvision.datasets.ImageFolder(root=DATA_PATH, transform=transform)
# Split into 80% Train, 20% Val
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, _ = random_split(
full_dataset, [train_size, val_size],
generator=torch.Generator().manual_seed(42)
)
trainloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)
# --- Model Setup ---
model = UNetClassifier(num_classes=300)
# 1. Profile on clone to avoid hook attribute error
print("[*] Calculating hardware metrics...")
model_for_profile = copy.deepcopy(model).to(DEVICE)
dummy_input = torch.randn(1, 3, 224, 224).to(DEVICE)
flops, params = profile(model_for_profile, inputs=(dummy_input, ), verbose=False)
del model_for_profile
# 2. Initially freeze encoder
for param in model.encoder.parameters():
param.requires_grad = False
model.to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-3)
scaler = torch.cuda.amp.GradScaler()
tracker = EmissionsTracker(measure_power_secs=1, save_to_file=False, log_level='error')
results = []
cumulative_total_energy = 0
best_acc = 0.0
print(f"\n[MODEL INFO] FLOPs: {flops/1e9:.2f} G | Parameters: {params/1e6:.2f} M | Classes: 300")
print(f"{'='*140}")
print(f"{'Epoch':<6} | {'Loss':<7} | {'Acc':<7} | {'Total(J)':<9} | {'VRAM(GB)':<9} | {'EAG':<8} | {'Status'}")
print(f"{'-'*140}")
for epoch in range(1, EPOCHS + 1):
if epoch == E_UNFREEZE:
for param in model.parameters(): param.requires_grad = True
for pg in optimizer.param_groups: pg['lr'] = 1e-5
status_msg = "UNFROZEN"
else:
status_msg = "FROZEN" if epoch < E_UNFREEZE else "FINE-TUNING"
model.train()
tracker.start()
epoch_start = time.time()
running_loss, all_preds, all_labels = 0.0, [], []
# Real-time progress bar
pbar = tqdm(enumerate(trainloader), total=len(trainloader), desc=f"Epoch {epoch:02d}", leave=False)
optimizer.zero_grad()
for i, (inputs, labels) in pbar:
inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
with torch.cuda.amp.autocast():
outputs = model(inputs)
cls_loss = criterion(outputs, labels)
l1_penalty = sum(p.abs().sum() for p in model.parameters() if p.requires_grad)
loss = (cls_loss + LAMBDA_L1 * l1_penalty) / ACCUMULATION_STEPS
scaler.scale(loss).backward()
if (i + 1) % ACCUMULATION_STEPS == 0:
scaler.unscale_(optimizer)
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
scaler.step(optimizer); scaler.update(); optimizer.zero_grad()
running_loss += cls_loss.item()
_, predicted = torch.max(outputs.data, 1)
all_preds.extend(predicted.cpu().numpy()); all_labels.extend(labels.cpu().numpy())
pbar.set_postfix({'loss': f"{cls_loss.item():.4f}"})
emissions_kg = tracker.stop()
duration = time.time() - epoch_start
e_tot = (tracker.final_emissions_data.gpu_energy + tracker.final_emissions_data.cpu_energy + tracker.final_emissions_data.ram_energy) * 3600000
cumulative_total_energy += e_tot
acc = (np.array(all_preds) == np.array(all_labels)).mean()
vram_peak = torch.cuda.max_memory_allocated(DEVICE) / (1024**3)
eag = acc / (e_tot / 1000) if e_tot > 0 else 0
# Detailed Audit Row
stats = {
"epoch": epoch, "status": status_msg, "loss": running_loss / len(trainloader),
"accuracy": acc, "total_energy_j": e_tot, "cumulative_energy_j": cumulative_total_energy,
"carbon_kg": emissions_kg, "vram_gb": vram_peak, "eag_metric": eag,
"latency_ms": (duration / len(trainloader)) * 1000,
"model_flops": flops, "model_params": params
}
results.append(stats)
pd.DataFrame(results).to_csv(CSV_FILENAME, index=False)
best_tag = "*" if acc > best_acc else ""
if acc > best_acc: best_acc = acc; torch.save(model.state_dict(), os.path.join(SAVE_DIR, f"BEST_{MODEL_NAME}_{DATASET_NAME}.pth"))
print(f"{epoch:02d}/50 | {stats['loss']:.4f} | {acc:.2%} | {e_tot:<9.2f} | {vram_peak:<9.3f} | {eag:<8.4f} | {status_msg}{best_tag}")
# Memory Flush for Batch Script
del model, trainloader
torch.cuda.empty_cache(); gc.collect()
if __name__ == '__main__':
main()