josondev commited on
Commit
cd39b6d
Β·
verified Β·
1 Parent(s): d3375c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +384 -5
app.py CHANGED
@@ -1,7 +1,386 @@
1
- import gradio as gr
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
1
+ # ============================================================================
2
+ # FACE AGE & GENDER PREDICTION - COMPLETE TRAINING WITH TRACKIO
3
+ # ============================================================================
4
+ # Generates graphs like your screenshot: train/val loss curves for age, gender, total
5
+ # Logs metrics per step and epoch to TrackIO for real-time visualization
6
 
7
+ !pip install -q trackio
8
+
9
+ import os
10
+ import gc
11
+ import numpy as np
12
+ import pandas as pd
13
+ from PIL import Image
14
+ import torch
15
+ from torch import nn
16
+ from torch.utils.data import Dataset, DataLoader
17
+ from torchvision import transforms, models
18
+ import pytorch_lightning as pl
19
+ from pytorch_lightning.callbacks import ModelCheckpoint
20
+ from sklearn.model_selection import train_test_split
21
+ from kaggle_secrets import UserSecretsClient
22
+ import trackio
23
+
24
+ # ============================================================================
25
+ # GLOBAL SETTINGS
26
+ # ============================================================================
27
+
28
+ class PipelineSettings:
29
+ def __init__(self):
30
+ self.DATA_ROOT_DIR = "/kaggle/input/sep-25-dl-gen-ai-nppe-1/face_dataset"
31
+ self.TRAIN_CSV_PATH = f"{self.DATA_ROOT_DIR}/train.csv"
32
+ self.TEST_CSV_PATH = f"{self.DATA_ROOT_DIR}/test.csv"
33
+
34
+ self.INPUT_IMAGE_SIZE = 128
35
+ self.BATCH_SIZE = 128
36
+ self.LEARNING_RATE = 1e-3
37
+ self.NUM_EPOCHS = 10
38
+ self.AGE_LOSS_WEIGHT = 0.01
39
+
40
+ self.NUM_DATALOADER_WORKERS = os.cpu_count()
41
+
42
+ settings = PipelineSettings()
43
+
44
+ # ============================================================================
45
+ # IMAGE AUGMENTATION
46
+ # ============================================================================
47
+
48
+ class ImageAugmentor:
49
+ def __init__(self, image_size):
50
+ self.image_size = image_size
51
+ self.norm_params = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225]}
52
+
53
+ def get_training_transforms(self):
54
+ return transforms.Compose([
55
+ transforms.Resize((self.image_size, self.image_size)),
56
+ transforms.RandomHorizontalFlip(p=0.5),
57
+ transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
58
+ transforms.ToTensor(),
59
+ transforms.Normalize(**self.norm_params),
60
+ ])
61
+
62
+ def get_inference_transforms(self):
63
+ return transforms.Compose([
64
+ transforms.Resize((self.image_size, self.image_size)),
65
+ transforms.ToTensor(),
66
+ transforms.Normalize(**self.norm_params),
67
+ ])
68
+
69
+ # ============================================================================
70
+ # DATASET
71
+ # ============================================================================
72
+
73
+ class FaceImageDataset(Dataset):
74
+ def __init__(self, metadata_df, image_dir, image_transform=None):
75
+ self.metadata = metadata_df
76
+ self.image_dir = image_dir
77
+ self.transform = image_transform
78
+
79
+ def __len__(self):
80
+ return len(self.metadata)
81
+
82
+ def __getitem__(self, idx):
83
+ row = self.metadata.iloc[idx]
84
+ image_path = os.path.join(self.image_dir, row['full_path'])
85
+ image = Image.open(image_path).convert("RGB")
86
+
87
+ if self.transform:
88
+ image = self.transform(image)
89
+
90
+ gender_target = torch.tensor(row['gender'], dtype=torch.float32)
91
+ age_target = torch.tensor(row['age'], dtype=torch.float32)
92
+ return image, gender_target, age_target
93
+
94
+ # ============================================================================
95
+ # DATA MODULE
96
+ # ============================================================================
97
+
98
+ class FaceDataModule(pl.LightningDataModule):
99
+ def __init__(self, config: PipelineSettings):
100
+ super().__init__()
101
+ self.cfg = config
102
+ self.augmentor = ImageAugmentor(self.cfg.INPUT_IMAGE_SIZE)
103
+ self.train_df, self.val_df = None, None
104
+
105
+ def prepare_data(self):
106
+ pass
107
+
108
+ def setup(self, stage=None):
109
+ if stage == 'fit' or stage is None:
110
+ full_train = pd.read_csv(self.cfg.TRAIN_CSV_PATH)
111
+ self.train_df, self.val_df = train_test_split(
112
+ full_train, test_size=0.15, random_state=42, stratify=full_train['gender']
113
+ )
114
+
115
+ self.train_dataset = FaceImageDataset(
116
+ self.train_df, self.cfg.DATA_ROOT_DIR, self.augmentor.get_training_transforms()
117
+ )
118
+ self.val_dataset = FaceImageDataset(
119
+ self.val_df, self.cfg.DATA_ROOT_DIR, self.augmentor.get_inference_transforms()
120
+ )
121
+
122
+ def train_dataloader(self):
123
+ return DataLoader(self.train_dataset, batch_size=self.cfg.BATCH_SIZE,
124
+ shuffle=True, num_workers=self.cfg.NUM_DATALOADER_WORKERS)
125
+
126
+ def val_dataloader(self):
127
+ return DataLoader(self.val_dataset, batch_size=self.cfg.BATCH_SIZE,
128
+ num_workers=self.cfg.NUM_DATALOADER_WORKERS)
129
+
130
+ # ============================================================================
131
+ # BASE MODEL WITH TRACKIO LOGGING (MATCHES YOUR SCREENSHOT)
132
+ # ============================================================================
133
+
134
+ class AbstractFaceModel(pl.LightningModule):
135
+ def __init__(self, learning_rate, age_loss_weight):
136
+ super().__init__()
137
+ self.save_hyperparameters()
138
+ self.lr = learning_rate
139
+ self.age_weight = age_loss_weight
140
+ self.gender_loss_fn = nn.BCEWithLogitsLoss()
141
+ self.age_loss_fn = nn.MSELoss()
142
+
143
+ self.training_step_outputs = []
144
+ self.validation_step_outputs = []
145
+
146
+ def _calculate_losses(self, gender_preds, age_preds, gender_labels, age_labels):
147
+ gender_loss = self.gender_loss_fn(gender_preds.squeeze(), gender_labels)
148
+ age_loss = self.age_loss_fn(age_preds.squeeze(), age_labels)
149
+ total_loss = gender_loss + (age_loss * self.age_weight)
150
+ return total_loss, gender_loss, age_loss
151
+
152
+ def training_step(self, batch, batch_idx):
153
+ images, gender_labels, age_labels = batch
154
+ gender_preds, age_preds = self(images)
155
+ total_loss, gender_loss, age_loss = self._calculate_losses(
156
+ gender_preds, age_preds, gender_labels, age_labels
157
+ )
158
+
159
+ # Log to Lightning (progress bar)
160
+ self.log('train_loss', total_loss, on_step=True, on_epoch=True, prog_bar=True)
161
+
162
+ # Store for TrackIO logging
163
+ self.training_step_outputs.append({
164
+ 'loss_total': total_loss.detach(),
165
+ 'loss_gender': gender_loss.detach(),
166
+ 'loss_age': age_loss.detach()
167
+ })
168
+
169
+ # Log to TrackIO per step (creates step-by-step graphs like your screenshot)
170
+ try:
171
+ trackio.log({
172
+ 'train/loss_total': total_loss.item(),
173
+ 'train/loss_gender': gender_loss.item(),
174
+ 'train/loss_age': age_loss.item(),
175
+ 'step': self.global_step
176
+ })
177
+ except: pass
178
+
179
+ return total_loss
180
+
181
+ def on_train_epoch_end(self):
182
+ if len(self.training_step_outputs) > 0:
183
+ # Calculate epoch averages
184
+ avg_total = torch.stack([x['loss_total'] for x in self.training_step_outputs]).mean()
185
+ avg_gender = torch.stack([x['loss_gender'] for x in self.training_step_outputs]).mean()
186
+ avg_age = torch.stack([x['loss_age'] for x in self.training_step_outputs]).mean()
187
+
188
+ # Log epoch summary to TrackIO
189
+ try:
190
+ trackio.log({
191
+ 'train/epoch_loss_total': avg_total.item(),
192
+ 'train/epoch_loss_gender': avg_gender.item(),
193
+ 'train/epoch_loss_age': avg_age.item(),
194
+ 'epoch': self.current_epoch
195
+ })
196
+ except: pass
197
+
198
+ self.training_step_outputs.clear()
199
+
200
+ def validation_step(self, batch, batch_idx):
201
+ images, gender_labels, age_labels = batch
202
+ gender_preds, age_preds = self(images)
203
+ total_loss, gender_loss, age_loss = self._calculate_losses(
204
+ gender_preds, age_preds, gender_labels, age_labels
205
+ )
206
+
207
+ # Log to Lightning
208
+ self.log('val_loss', total_loss, on_epoch=True, prog_bar=True)
209
+
210
+ # Store for TrackIO
211
+ self.validation_step_outputs.append({
212
+ 'loss_total': total_loss.detach(),
213
+ 'loss_gender': gender_loss.detach(),
214
+ 'loss_age': age_loss.detach()
215
+ })
216
+
217
+ def on_validation_epoch_end(self):
218
+ if len(self.validation_step_outputs) > 0:
219
+ # Calculate validation averages
220
+ avg_total = torch.stack([x['loss_total'] for x in self.validation_step_outputs]).mean()
221
+ avg_gender = torch.stack([x['loss_gender'] for x in self.validation_step_outputs]).mean()
222
+ avg_age = torch.stack([x['loss_age'] for x in self.validation_step_outputs]).mean()
223
+
224
+ # Log to TrackIO (creates val graphs like your screenshot)
225
+ try:
226
+ trackio.log({
227
+ 'val/loss_total': avg_total.item(),
228
+ 'val/loss_gender': avg_gender.item(),
229
+ 'val/loss_age': avg_age.item(),
230
+ 'epoch': self.current_epoch
231
+ })
232
+ except: pass
233
+
234
+ self.validation_step_outputs.clear()
235
+
236
+ def configure_optimizers(self):
237
+ return torch.optim.Adam(self.parameters(), lr=self.lr)
238
+
239
+ # ============================================================================
240
+ # SCRATCH CNN MODEL
241
+ # ============================================================================
242
+
243
+ class ScratchCNNModel(AbstractFaceModel):
244
+ def __init__(self, learning_rate, age_loss_weight):
245
+ super().__init__(learning_rate, age_loss_weight)
246
+
247
+ def conv_block(in_f, out_f):
248
+ return nn.Sequential(
249
+ nn.Conv2d(in_f, out_f, 3, padding=1, bias=False),
250
+ nn.BatchNorm2d(out_f),
251
+ nn.ReLU(inplace=True),
252
+ nn.MaxPool2d(2, 2)
253
+ )
254
+
255
+ self.feature_extractor = nn.Sequential(
256
+ conv_block(3, 32), conv_block(32, 64),
257
+ conv_block(64, 128), conv_block(128, 256)
258
+ )
259
+
260
+ probe = torch.randn(1, 3, settings.INPUT_IMAGE_SIZE, settings.INPUT_IMAGE_SIZE)
261
+ flat_size = self.feature_extractor(probe).view(1, -1).size(1)
262
+
263
+ self.gender_head = nn.Linear(flat_size, 1)
264
+ self.age_head = nn.Linear(flat_size, 1)
265
+
266
+ def forward(self, x):
267
+ features = torch.flatten(self.feature_extractor(x), 1)
268
+ return self.gender_head(features), self.age_head(features)
269
+
270
+ # ============================================================================
271
+ # FINE-TUNED RESNET MODEL
272
+ # ============================================================================
273
+
274
+ class FineTunedResNetModel(AbstractFaceModel):
275
+ def __init__(self, learning_rate, age_loss_weight):
276
+ super().__init__(learning_rate, age_loss_weight)
277
+ resnet = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
278
+ num_features = resnet.fc.in_features
279
+
280
+ self.backbone = nn.Sequential(*list(resnet.children())[:-1])
281
+ self.gender_head = nn.Linear(num_features, 1)
282
+ self.age_head = nn.Linear(num_features, 1)
283
+
284
+ def forward(self, x):
285
+ features = torch.flatten(self.backbone(x), 1)
286
+ return self.gender_head(features), self.age_head(features)
287
+
288
+ # ============================================================================
289
+ # PIPELINE RUNNER
290
+ # ============================================================================
291
+
292
+ class PipelineRunner:
293
+ def __init__(self, cfg: PipelineSettings):
294
+ self.cfg = cfg
295
+ self.data_module = FaceDataModule(cfg)
296
+ self._setup_trackio()
297
+
298
+ def _setup_trackio(self):
299
+ try:
300
+ secrets = UserSecretsClient()
301
+ hf_token = secrets.get_secret("HUGGINGFACE_TOKEN")
302
+ os.environ["HF_TOKEN"] = hf_token
303
+ print("βœ… TrackIO auth configured")
304
+ except Exception as e:
305
+ print(f"⚠️ TrackIO auth failed: {e}")
306
+
307
+ def _train_model(self, model, model_name, run_name):
308
+ print(f"\n{'='*70}\nπŸš€ Training: {model_name}\n{'='*70}")
309
+
310
+ # Initialize TrackIO with your space
311
+ try:
312
+ trackio.init(
313
+ space_id="muhammad-bilal1/dlgenai-nppe", # UPDATE: Your HF space from screenshot
314
+ project="25-t3-nppe1",
315
+ group=run_name,
316
+ config={
317
+ "lr": self.cfg.LEARNING_RATE,
318
+ "epochs": self.cfg.NUM_EPOCHS,
319
+ "batch_size": self.cfg.BATCH_SIZE,
320
+ "model": model_name,
321
+ "image_size": self.cfg.INPUT_IMAGE_SIZE,
322
+ "age_weight": self.cfg.AGE_LOSS_WEIGHT
323
+ }
324
+ )
325
+ print(f"βœ… TrackIO initialized: {run_name}")
326
+ except Exception as e:
327
+ print(f"⚠️ TrackIO init failed: {e}")
328
+
329
+ # Setup checkpoint callback
330
+ checkpoint_cb = ModelCheckpoint(
331
+ monitor='val_loss',
332
+ dirpath='/kaggle/working/',
333
+ filename=f'{model_name}-best-model',
334
+ save_top_k=1,
335
+ mode='min'
336
+ )
337
+
338
+ # Train
339
+ trainer = pl.Trainer(
340
+ max_epochs=self.cfg.NUM_EPOCHS,
341
+ accelerator='gpu',
342
+ devices='auto',
343
+ strategy="ddp_notebook",
344
+ callbacks=[checkpoint_cb],
345
+ log_every_n_steps=10 # Log frequently for smooth graphs
346
+ )
347
+
348
+ trainer.fit(model, self.data_module)
349
+ print(f"βœ… Checkpoint: {checkpoint_cb.best_model_path}")
350
+
351
+ # Finish TrackIO run
352
+ try:
353
+ final_val = trainer.callback_metrics.get('val_loss', torch.tensor(0.0)).item()
354
+ trackio.log({"final_val_loss": final_val})
355
+ trackio.finish()
356
+ print("βœ… TrackIO run finished")
357
+ except Exception as e:
358
+ print(f"⚠️ TrackIO finish failed: {e}")
359
+
360
+ del model, trainer, checkpoint_cb
361
+ gc.collect()
362
+ torch.cuda.empty_cache()
363
+
364
+ def execute(self):
365
+ print("\nπŸ”₯ TRAINING PIPELINE STARTED\n")
366
+
367
+ # Train Scratch CNN
368
+ scratch = ScratchCNNModel(self.cfg.LEARNING_RATE, self.cfg.AGE_LOSS_WEIGHT)
369
+ self._train_model(scratch, "scratch", "scratch-cnn-run")
370
+
371
+ # Train Fine-Tuned ResNet
372
+ finetuned = FineTunedResNetModel(self.cfg.LEARNING_RATE, self.cfg.AGE_LOSS_WEIGHT)
373
+ self._train_model(finetuned, "finetuned", "resnet-finetuned-run")
374
+
375
+ print("\nπŸŽ‰ TRAINING COMPLETE!")
376
+ print("πŸ“‚ Checkpoints: /kaggle/working/")
377
+ print("πŸ“Š TrackIO Dashboard: https://huggingface.co/spaces/muhammad-bilal1/dlgenai-nppe")
378
+
379
+ # ============================================================================
380
+ # RUN TRAINING
381
+ # ============================================================================
382
+
383
+ if __name__ == "__main__":
384
+ pipeline = PipelineRunner(settings)
385
+ pipeline.execute()
386