josondev commited on
Commit
3ad52b3
·
verified ·
1 Parent(s): 1b5f829

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -375
app.py CHANGED
@@ -1,379 +1,8 @@
1
- # ============================================================================
2
- # FACE AGE & GENDER PREDICTION - COMPLETE TRAINING WITH TRACKIO
3
-
4
-
5
- import os
6
- import gc
7
- import numpy as np
8
- import pandas as pd
9
- from PIL import Image
10
- import torch
11
- from torch import nn
12
- from torch.utils.data import Dataset, DataLoader
13
- from torchvision import transforms, models
14
- import pytorch_lightning as pl
15
- from pytorch_lightning.callbacks import ModelCheckpoint
16
- from sklearn.model_selection import train_test_split
17
  import trackio
18
 
19
- # ============================================================================
20
- # GLOBAL SETTINGS
21
- # ============================================================================
22
-
23
- class PipelineSettings:
24
- def __init__(self):
25
- self.DATA_ROOT_DIR = "/kaggle/input/sep-25-dl-gen-ai-nppe-1/face_dataset"
26
- self.TRAIN_CSV_PATH = f"{self.DATA_ROOT_DIR}/train.csv"
27
- self.TEST_CSV_PATH = f"{self.DATA_ROOT_DIR}/test.csv"
28
-
29
- self.INPUT_IMAGE_SIZE = 128
30
- self.BATCH_SIZE = 128
31
- self.LEARNING_RATE = 1e-3
32
- self.NUM_EPOCHS = 10
33
- self.AGE_LOSS_WEIGHT = 0.01
34
-
35
- self.NUM_DATALOADER_WORKERS = os.cpu_count()
36
-
37
- settings = PipelineSettings()
38
-
39
- # ============================================================================
40
- # IMAGE AUGMENTATION
41
- # ============================================================================
42
-
43
- class ImageAugmentor:
44
- def __init__(self, image_size):
45
- self.image_size = image_size
46
- self.norm_params = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225]}
47
-
48
- def get_training_transforms(self):
49
- return transforms.Compose([
50
- transforms.Resize((self.image_size, self.image_size)),
51
- transforms.RandomHorizontalFlip(p=0.5),
52
- transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
53
- transforms.ToTensor(),
54
- transforms.Normalize(**self.norm_params),
55
- ])
56
-
57
- def get_inference_transforms(self):
58
- return transforms.Compose([
59
- transforms.Resize((self.image_size, self.image_size)),
60
- transforms.ToTensor(),
61
- transforms.Normalize(**self.norm_params),
62
- ])
63
-
64
- # ============================================================================
65
- # DATASET
66
- # ============================================================================
67
-
68
- class FaceImageDataset(Dataset):
69
- def __init__(self, metadata_df, image_dir, image_transform=None):
70
- self.metadata = metadata_df
71
- self.image_dir = image_dir
72
- self.transform = image_transform
73
-
74
- def __len__(self):
75
- return len(self.metadata)
76
-
77
- def __getitem__(self, idx):
78
- row = self.metadata.iloc[idx]
79
- image_path = os.path.join(self.image_dir, row['full_path'])
80
- image = Image.open(image_path).convert("RGB")
81
-
82
- if self.transform:
83
- image = self.transform(image)
84
-
85
- gender_target = torch.tensor(row['gender'], dtype=torch.float32)
86
- age_target = torch.tensor(row['age'], dtype=torch.float32)
87
- return image, gender_target, age_target
88
-
89
- # ============================================================================
90
- # DATA MODULE
91
- # ============================================================================
92
-
93
- class FaceDataModule(pl.LightningDataModule):
94
- def __init__(self, config: PipelineSettings):
95
- super().__init__()
96
- self.cfg = config
97
- self.augmentor = ImageAugmentor(self.cfg.INPUT_IMAGE_SIZE)
98
- self.train_df, self.val_df = None, None
99
-
100
- def prepare_data(self):
101
- pass
102
-
103
- def setup(self, stage=None):
104
- if stage == 'fit' or stage is None:
105
- full_train = pd.read_csv(self.cfg.TRAIN_CSV_PATH)
106
- self.train_df, self.val_df = train_test_split(
107
- full_train, test_size=0.15, random_state=42, stratify=full_train['gender']
108
- )
109
-
110
- self.train_dataset = FaceImageDataset(
111
- self.train_df, self.cfg.DATA_ROOT_DIR, self.augmentor.get_training_transforms()
112
- )
113
- self.val_dataset = FaceImageDataset(
114
- self.val_df, self.cfg.DATA_ROOT_DIR, self.augmentor.get_inference_transforms()
115
- )
116
-
117
- def train_dataloader(self):
118
- return DataLoader(self.train_dataset, batch_size=self.cfg.BATCH_SIZE,
119
- shuffle=True, num_workers=self.cfg.NUM_DATALOADER_WORKERS)
120
-
121
- def val_dataloader(self):
122
- return DataLoader(self.val_dataset, batch_size=self.cfg.BATCH_SIZE,
123
- num_workers=self.cfg.NUM_DATALOADER_WORKERS)
124
-
125
- # ============================================================================
126
- # BASE MODEL WITH TRACKIO LOGGING
127
- # ============================================================================
128
-
129
- class AbstractFaceModel(pl.LightningModule):
130
- def __init__(self, learning_rate, age_loss_weight):
131
- super().__init__()
132
- self.save_hyperparameters()
133
- self.lr = learning_rate
134
- self.age_weight = age_loss_weight
135
- self.gender_loss_fn = nn.BCEWithLogitsLoss()
136
- self.age_loss_fn = nn.MSELoss()
137
-
138
- self.training_step_outputs = []
139
- self.validation_step_outputs = []
140
-
141
- def _calculate_losses(self, gender_preds, age_preds, gender_labels, age_labels):
142
- gender_loss = self.gender_loss_fn(gender_preds.squeeze(), gender_labels)
143
- age_loss = self.age_loss_fn(age_preds.squeeze(), age_labels)
144
- total_loss = gender_loss + (age_loss * self.age_weight)
145
- return total_loss, gender_loss, age_loss
146
-
147
- def training_step(self, batch, batch_idx):
148
- images, gender_labels, age_labels = batch
149
- gender_preds, age_preds = self(images)
150
- total_loss, gender_loss, age_loss = self._calculate_losses(
151
- gender_preds, age_preds, gender_labels, age_labels
152
- )
153
-
154
- self.log('train_loss', total_loss, on_step=True, on_epoch=True, prog_bar=True)
155
-
156
- self.training_step_outputs.append({
157
- 'loss_total': total_loss.detach(),
158
- 'loss_gender': gender_loss.detach(),
159
- 'loss_age': age_loss.detach()
160
- })
161
-
162
- # Log to TrackIO per step
163
- try:
164
- trackio.log({
165
- 'train/loss_total': total_loss.item(),
166
- 'train/loss_gender': gender_loss.item(),
167
- 'train/loss_age': age_loss.item(),
168
- 'step': self.global_step
169
- })
170
- except: pass
171
-
172
- return total_loss
173
-
174
- def on_train_epoch_end(self):
175
- if len(self.training_step_outputs) > 0:
176
- avg_total = torch.stack([x['loss_total'] for x in self.training_step_outputs]).mean()
177
- avg_gender = torch.stack([x['loss_gender'] for x in self.training_step_outputs]).mean()
178
- avg_age = torch.stack([x['loss_age'] for x in self.training_step_outputs]).mean()
179
-
180
- try:
181
- trackio.log({
182
- 'train/epoch_loss_total': avg_total.item(),
183
- 'train/epoch_loss_gender': avg_gender.item(),
184
- 'train/epoch_loss_age': avg_age.item(),
185
- 'epoch': self.current_epoch
186
- })
187
- except: pass
188
-
189
- self.training_step_outputs.clear()
190
-
191
- def validation_step(self, batch, batch_idx):
192
- images, gender_labels, age_labels = batch
193
- gender_preds, age_preds = self(images)
194
- total_loss, gender_loss, age_loss = self._calculate_losses(
195
- gender_preds, age_preds, gender_labels, age_labels
196
- )
197
-
198
- self.log('val_loss', total_loss, on_epoch=True, prog_bar=True)
199
-
200
- self.validation_step_outputs.append({
201
- 'loss_total': total_loss.detach(),
202
- 'loss_gender': gender_loss.detach(),
203
- 'loss_age': age_loss.detach()
204
- })
205
-
206
- def on_validation_epoch_end(self):
207
- if len(self.validation_step_outputs) > 0:
208
- avg_total = torch.stack([x['loss_total'] for x in self.validation_step_outputs]).mean()
209
- avg_gender = torch.stack([x['loss_gender'] for x in self.validation_step_outputs]).mean()
210
- avg_age = torch.stack([x['loss_age'] for x in self.validation_step_outputs]).mean()
211
-
212
- try:
213
- trackio.log({
214
- 'val/loss_total': avg_total.item(),
215
- 'val/loss_gender': avg_gender.item(),
216
- 'val/loss_age': avg_age.item(),
217
- 'epoch': self.current_epoch
218
- })
219
- except: pass
220
-
221
- self.validation_step_outputs.clear()
222
-
223
- def configure_optimizers(self):
224
- return torch.optim.Adam(self.parameters(), lr=self.lr)
225
-
226
- # ============================================================================
227
- # SCRATCH CNN MODEL
228
- # ============================================================================
229
-
230
- class ScratchCNNModel(AbstractFaceModel):
231
- def __init__(self, learning_rate, age_loss_weight):
232
- super().__init__(learning_rate, age_loss_weight)
233
-
234
- def conv_block(in_f, out_f):
235
- return nn.Sequential(
236
- nn.Conv2d(in_f, out_f, 3, padding=1, bias=False),
237
- nn.BatchNorm2d(out_f),
238
- nn.ReLU(inplace=True),
239
- nn.MaxPool2d(2, 2)
240
- )
241
-
242
- self.feature_extractor = nn.Sequential(
243
- conv_block(3, 32), conv_block(32, 64),
244
- conv_block(64, 128), conv_block(128, 256)
245
- )
246
-
247
- probe = torch.randn(1, 3, settings.INPUT_IMAGE_SIZE, settings.INPUT_IMAGE_SIZE)
248
- flat_size = self.feature_extractor(probe).view(1, -1).size(1)
249
-
250
- self.gender_head = nn.Linear(flat_size, 1)
251
- self.age_head = nn.Linear(flat_size, 1)
252
-
253
- def forward(self, x):
254
- features = torch.flatten(self.feature_extractor(x), 1)
255
- return self.gender_head(features), self.age_head(features)
256
-
257
- # ============================================================================
258
- # FINE-TUNED RESNET MODEL
259
- # ============================================================================
260
-
261
- class FineTunedResNetModel(AbstractFaceModel):
262
- def __init__(self, learning_rate, age_loss_weight):
263
- super().__init__(learning_rate, age_loss_weight)
264
- resnet = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
265
- num_features = resnet.fc.in_features
266
-
267
- self.backbone = nn.Sequential(*list(resnet.children())[:-1])
268
- self.gender_head = nn.Linear(num_features, 1)
269
- self.age_head = nn.Linear(num_features, 1)
270
-
271
- def forward(self, x):
272
- features = torch.flatten(self.backbone(x), 1)
273
- return self.gender_head(features), self.age_head(features)
274
-
275
- # ============================================================================
276
- # PIPELINE RUNNER (FIXED: NO KAGGLE SECRETS DEPENDENCY)
277
- # ============================================================================
278
-
279
- class PipelineRunner:
280
- def __init__(self, cfg: PipelineSettings):
281
- self.cfg = cfg
282
- self.data_module = FaceDataModule(cfg)
283
- self._setup_trackio()
284
-
285
- def _setup_trackio(self):
286
- """Setup HF token for TrackIO - works in Kaggle and locally"""
287
- try:
288
- # Method 1: Try Kaggle Secrets (only works in Kaggle)
289
- from kaggle_secrets import UserSecretsClient
290
- secrets = UserSecretsClient()
291
- hf_token = secrets.get_secret("HUGGINGFACE_TOKEN")
292
- os.environ["HF_TOKEN"] = hf_token
293
- print("✅ TrackIO auth from Kaggle Secrets")
294
- except:
295
- # Method 2: Try environment variable (set in Kaggle Secrets or locally)
296
- hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
297
- if hf_token:
298
- os.environ["HF_TOKEN"] = hf_token
299
- print("✅ TrackIO auth from environment variable")
300
- else:
301
- # Method 3: Hardcode (for testing only - DO NOT commit to GitHub!)
302
- # hf_token = "hf_YOUR_TOKEN_HERE" # UNCOMMENT and add your token
303
- # os.environ["HF_TOKEN"] = hf_token
304
- print("⚠️ No HF token found. TrackIO may fail. Add to Kaggle Secrets or set HF_TOKEN env var.")
305
-
306
- def _train_model(self, model, model_name, run_name):
307
- print(f"\n{'='*70}\n🚀 Training: {model_name}\n{'='*70}")
308
-
309
- # Initialize TrackIO
310
- try:
311
- trackio.init(
312
- space_id="muhammad-bilal1/dlgenai-nppe", # UPDATE: Your HF space
313
- project="25-t3-nppe1",
314
- group=run_name,
315
- config={
316
- "lr": self.cfg.LEARNING_RATE,
317
- "epochs": self.cfg.NUM_EPOCHS,
318
- "batch_size": self.cfg.BATCH_SIZE,
319
- "model": model_name,
320
- "image_size": self.cfg.INPUT_IMAGE_SIZE,
321
- "age_weight": self.cfg.AGE_LOSS_WEIGHT
322
- }
323
- )
324
- print(f"✅ TrackIO initialized: {run_name}")
325
- except Exception as e:
326
- print(f"⚠️ TrackIO init failed: {e}")
327
-
328
- checkpoint_cb = ModelCheckpoint(
329
- monitor='val_loss',
330
- dirpath='/kaggle/working/',
331
- filename=f'{model_name}-best-model',
332
- save_top_k=1,
333
- mode='min'
334
- )
335
-
336
- trainer = pl.Trainer(
337
- max_epochs=self.cfg.NUM_EPOCHS,
338
- accelerator='gpu',
339
- devices='auto',
340
- strategy="ddp_notebook",
341
- callbacks=[checkpoint_cb],
342
- log_every_n_steps=10
343
- )
344
-
345
- trainer.fit(model, self.data_module)
346
- print(f"✅ Checkpoint: {checkpoint_cb.best_model_path}")
347
-
348
- try:
349
- final_val = trainer.callback_metrics.get('val_loss', torch.tensor(0.0)).item()
350
- trackio.log({"final_val_loss": final_val})
351
- trackio.finish()
352
- print("✅ TrackIO run finished")
353
- except Exception as e:
354
- print(f"⚠️ TrackIO finish: {e}")
355
-
356
- del model, trainer, checkpoint_cb
357
- gc.collect()
358
- torch.cuda.empty_cache()
359
-
360
- def execute(self):
361
- print("\n🔥 TRAINING PIPELINE STARTED\n")
362
-
363
- scratch = ScratchCNNModel(self.cfg.LEARNING_RATE, self.cfg.AGE_LOSS_WEIGHT)
364
- self._train_model(scratch, "scratch", "scratch-cnn-run")
365
-
366
- finetuned = FineTunedResNetModel(self.cfg.LEARNING_RATE, self.cfg.AGE_LOSS_WEIGHT)
367
- self._train_model(finetuned, "finetuned", "resnet-finetuned-run")
368
-
369
- print("\n🎉 TRAINING COMPLETE!")
370
- print("📂 Checkpoints: /kaggle/working/")
371
- print("📊 TrackIO: https://huggingface.co/spaces/josondev/IITM-NPPE")
372
-
373
- # ============================================================================
374
- # RUN TRAINING
375
- # ============================================================================
376
 
377
  if __name__ == "__main__":
378
- pipeline = PipelineRunner(settings)
379
- pipeline.execute()
 
1
+ # app.py - SIMPLE TRACKIO DASHBOARD (NO TRAINING)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import trackio
3
 
4
+ # Just launch the dashboard - training happens in Kaggle
5
+ demo = trackio.demo()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  if __name__ == "__main__":
8
+ demo.launch()