Shanmuk4622 commited on
Commit
af09f06
·
verified ·
1 Parent(s): ae10333

Upload test3/eden_VGG16_ImageNet.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. test3/eden_VGG16_ImageNet.py +174 -0
test3/eden_VGG16_ImageNet.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.optim as optim
4
+ import torchvision
5
+ import torchvision.transforms as transforms
6
+ from torchvision.datasets import ImageFolder
7
+ from torch.utils.data import DataLoader
8
+ from sklearn.metrics import f1_score, precision_score, recall_score
9
+ from codecarbon import EmissionsTracker
10
+ from thop import profile
11
+ from tqdm import tqdm
12
+ import time
13
+ import pandas as pd
14
+ import numpy as np
15
+ import os
16
+ import warnings
17
+ from datetime import timedelta
18
+
19
+ # --- Configuration ---
20
+ MODEL_NAME = "vgg16"
21
+ DATASET_NAME = "CustomImageNet300"
22
+ DATA_PATH = r'C:\Users\shanm\Dataset Download\custom image net'
23
+ BATCH_SIZE = 32
24
+ EPOCHS = 50
25
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
26
+
27
+ SAVE_DIR = "saved_models"
28
+ os.makedirs(SAVE_DIR, exist_ok=True)
29
+ CSV_FILENAME = f"{MODEL_NAME}_{DATASET_NAME}_stats.csv"
30
+
31
+ warnings.filterwarnings("ignore")
32
+ os.environ["CODECARBON_LOG_LEVEL"] = "error"
33
+
34
+ def main():
35
+ # 1. Data Loading
36
+ transform = transforms.Compose([
37
+ transforms.Resize(256),
38
+ transforms.CenterCrop(224),
39
+ transforms.RandomHorizontalFlip(),
40
+ transforms.ToTensor(),
41
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
42
+ ])
43
+
44
+ if not os.path.exists(DATA_PATH):
45
+ print(f"[ERROR] Dataset path not found: {DATA_PATH}")
46
+ return
47
+
48
+ trainset = ImageFolder(root=DATA_PATH, transform=transform)
49
+ trainloader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
50
+
51
+ # 2. Model setup
52
+ model = torchvision.models.vgg16(weights=None)
53
+ model.classifier[6] = nn.Linear(4096, 300)
54
+ model.to(DEVICE)
55
+
56
+ dummy_input = torch.randn(1, 3, 224, 224).to(DEVICE)
57
+ flops, params = profile(model, inputs=(dummy_input, ), verbose=False)
58
+
59
+ criterion = nn.CrossEntropyLoss()
60
+ optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
61
+ scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)
62
+
63
+ results = []
64
+ cumulative_total_energy = 0
65
+ total_start_time = time.time()
66
+ best_acc = 0.0
67
+
68
+ print(f"\n[MODEL INFO] FLOPs: {flops/1e9:.2f} G | Parameters: {params/1e6:.2f} M")
69
+ print("="*125)
70
+ print(f"TRAINING {MODEL_NAME.upper()} ON {DATASET_NAME}")
71
+ print("-" * 125)
72
+
73
+ try:
74
+ for epoch in range(1, EPOCHS + 1):
75
+ # START TRACKER FOR THIS EPOCH
76
+ tracker = EmissionsTracker(measure_power_secs=1, save_to_file=False, log_level='error')
77
+ tracker.start()
78
+
79
+ model.train()
80
+ epoch_start_time = time.time()
81
+ running_loss, all_preds, all_labels, grad_norms = 0.0, [], [], []
82
+
83
+ pbar = tqdm(enumerate(trainloader), total=len(trainloader), desc=f"Epoch {epoch}/{EPOCHS}")
84
+
85
+ for i, (inputs, labels) in pbar:
86
+ inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
87
+
88
+ optimizer.zero_grad()
89
+ outputs = model(inputs)
90
+ loss = criterion(outputs, labels)
91
+ loss.backward()
92
+
93
+ grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=100)
94
+ grad_norms.append(grad_norm.item())
95
+ optimizer.step()
96
+
97
+ running_loss += loss.item()
98
+ _, predicted = torch.max(outputs.data, 1)
99
+
100
+ pbar.set_postfix({'loss': f'{running_loss/(i+1):.4f}'})
101
+
102
+ all_preds.extend(predicted.cpu().numpy())
103
+ all_labels.extend(labels.cpu().numpy())
104
+
105
+ scheduler.step()
106
+ duration = time.time() - epoch_start_time
107
+
108
+ # STOP TRACKER TO POPULATE final_emissions_data
109
+ emissions_kg = tracker.stop()
110
+
111
+ # --- CALCULATIONS (Now safe because tracker is stopped) ---
112
+ e_gpu = tracker.final_emissions_data.gpu_energy * 3600000
113
+ e_cpu = tracker.final_emissions_data.cpu_energy * 3600000
114
+ e_ram = tracker.final_emissions_data.ram_energy * 3600000
115
+ total_energy = e_gpu + e_cpu + e_ram
116
+ cumulative_total_energy += total_energy
117
+
118
+ acc = (np.array(all_preds) == np.array(all_labels)).mean()
119
+ f1 = f1_score(all_labels, all_preds, average='macro')
120
+ vram_peak = torch.cuda.max_memory_allocated(DEVICE) / (1024**3) if torch.cuda.is_available() else 0
121
+ elapsed_total = time.time() - total_start_time
122
+ avg_per_epoch = elapsed_total / epoch
123
+ eta = str(timedelta(seconds=int(avg_per_epoch * (EPOCHS - epoch))))
124
+
125
+ # --- ALL REQUESTED STATS ---
126
+ epoch_stats = {
127
+ "epoch": epoch,
128
+ "loss": running_loss / len(trainloader),
129
+ "accuracy": acc,
130
+ "f1_score": f1,
131
+ "precision": precision_score(all_labels, all_preds, average='macro', zero_division=0),
132
+ "recall": recall_score(all_labels, all_preds, average='macro', zero_division=0),
133
+ "epoch_energy_gpu_j": e_gpu,
134
+ "epoch_energy_cpu_j": e_cpu,
135
+ "epoch_energy_ram_j": e_ram,
136
+ "epoch_total_energy_j": total_energy,
137
+ "cumulative_total_energy_j": cumulative_total_energy,
138
+ "carbon_emissions_kg": emissions_kg,
139
+ "vram_peak_gb": vram_peak,
140
+ "avg_power_gpu_w": tracker.final_emissions_data.gpu_power,
141
+ "avg_power_cpu_w": tracker.final_emissions_data.cpu_power,
142
+ "avg_power_ram_w": tracker.final_emissions_data.ram_power,
143
+ "latency_ms": (duration / len(trainloader)) * 1000,
144
+ "avg_grad_norm": np.mean(grad_norms),
145
+ "eag_metric": acc / (total_energy / 1000) if total_energy > 0 else 0,
146
+ "it_per_sec": len(trainloader) / duration,
147
+ "total_iterations": len(trainloader),
148
+ "epoch_duration_sec": duration,
149
+ "cumulative_time_sec": elapsed_total,
150
+ "model_flops": flops,
151
+ "model_parameters": params
152
+ }
153
+ results.append(epoch_stats)
154
+ pd.DataFrame(results).to_csv(CSV_FILENAME, index=False)
155
+
156
+ if acc > best_acc:
157
+ best_acc = acc
158
+ torch.save(model.state_dict(), os.path.join(SAVE_DIR, f"BEST_{MODEL_NAME}_{DATASET_NAME}.pth"))
159
+ best_msg = " (Best Saved!)"
160
+ else:
161
+ best_msg = ""
162
+
163
+ print(f"\nEpoch {epoch:02d} Summary: Loss: {epoch_stats['loss']:.4f} | Acc: {acc:.2%} | Energy: {total_energy:.2f}J | VRAM: {vram_peak:.2f}GB | ETA: {eta}{best_msg}\n")
164
+ print("-" * 125)
165
+
166
+ except Exception as e:
167
+ print(f"\n[CRASH] Error: {e}")
168
+ import traceback
169
+ traceback.print_exc()
170
+ finally:
171
+ print(f"\n[SUCCESS] Training Complete. Results saved to {CSV_FILENAME}")
172
+
173
+ if __name__ == '__main__':
174
+ main()