Spaces:

josondev
/

IITM-NPPE

Runtime error

App Files Files Community

josondev commited on Nov 12

Commit

3ad52b3

verified ·

1 Parent(s): 1b5f829

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -375

app.py CHANGED Viewed

@@ -1,379 +1,8 @@
-# ============================================================================
-# FACE AGE & GENDER PREDICTION - COMPLETE TRAINING WITH TRACKIO
-import os
-import gc
-import numpy as np
-import pandas as pd
-from PIL import Image
-import torch
-from torch import nn
-from torch.utils.data import Dataset, DataLoader
-from torchvision import transforms, models
-import pytorch_lightning as pl
-from pytorch_lightning.callbacks import ModelCheckpoint
-from sklearn.model_selection import train_test_split
 import trackio
-# ============================================================================
-# GLOBAL SETTINGS
-# ============================================================================
-class PipelineSettings:
-    def __init__(self):
-        self.DATA_ROOT_DIR = "/kaggle/input/sep-25-dl-gen-ai-nppe-1/face_dataset"
-        self.TRAIN_CSV_PATH = f"{self.DATA_ROOT_DIR}/train.csv"
-        self.TEST_CSV_PATH = f"{self.DATA_ROOT_DIR}/test.csv"
-        self.INPUT_IMAGE_SIZE = 128
-        self.BATCH_SIZE = 128
-        self.LEARNING_RATE = 1e-3
-        self.NUM_EPOCHS = 10
-        self.AGE_LOSS_WEIGHT = 0.01
-        self.NUM_DATALOADER_WORKERS = os.cpu_count()
-settings = PipelineSettings()
-# ============================================================================
-# IMAGE AUGMENTATION
-# ============================================================================
-class ImageAugmentor:
-    def __init__(self, image_size):
-        self.image_size = image_size
-        self.norm_params = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225]}
-    def get_training_transforms(self):
-        return transforms.Compose([
-            transforms.Resize((self.image_size, self.image_size)),
-            transforms.RandomHorizontalFlip(p=0.5),
-            transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
-            transforms.ToTensor(),
-            transforms.Normalize(**self.norm_params),
-        ])
-    def get_inference_transforms(self):
-        return transforms.Compose([
-            transforms.Resize((self.image_size, self.image_size)),
-            transforms.ToTensor(),
-            transforms.Normalize(**self.norm_params),
-        ])
-# ============================================================================
-# DATASET
-# ============================================================================
-class FaceImageDataset(Dataset):
-    def __init__(self, metadata_df, image_dir, image_transform=None):
-        self.metadata = metadata_df
-        self.image_dir = image_dir
-        self.transform = image_transform
-    def __len__(self):
-        return len(self.metadata)
-    def __getitem__(self, idx):
-        row = self.metadata.iloc[idx]
-        image_path = os.path.join(self.image_dir, row['full_path'])
-        image = Image.open(image_path).convert("RGB")
-        if self.transform:
-            image = self.transform(image)
-        gender_target = torch.tensor(row['gender'], dtype=torch.float32)
-        age_target = torch.tensor(row['age'], dtype=torch.float32)
-        return image, gender_target, age_target
-# ============================================================================
-# DATA MODULE
-# ============================================================================
-class FaceDataModule(pl.LightningDataModule):
-    def __init__(self, config: PipelineSettings):
-        super().__init__()
-        self.cfg = config
-        self.augmentor = ImageAugmentor(self.cfg.INPUT_IMAGE_SIZE)
-        self.train_df, self.val_df = None, None
-    def prepare_data(self):
-        pass
-    def setup(self, stage=None):
-        if stage == 'fit' or stage is None:
-            full_train = pd.read_csv(self.cfg.TRAIN_CSV_PATH)
-            self.train_df, self.val_df = train_test_split(
-                full_train, test_size=0.15, random_state=42, stratify=full_train['gender']
-            )
-            self.train_dataset = FaceImageDataset(
-                self.train_df, self.cfg.DATA_ROOT_DIR, self.augmentor.get_training_transforms()
-            )
-            self.val_dataset = FaceImageDataset(
-                self.val_df, self.cfg.DATA_ROOT_DIR, self.augmentor.get_inference_transforms()
-            )
-    def train_dataloader(self):
-        return DataLoader(self.train_dataset, batch_size=self.cfg.BATCH_SIZE,
-                         shuffle=True, num_workers=self.cfg.NUM_DATALOADER_WORKERS)
-    def val_dataloader(self):
-        return DataLoader(self.val_dataset, batch_size=self.cfg.BATCH_SIZE,
-                         num_workers=self.cfg.NUM_DATALOADER_WORKERS)
-# ============================================================================
-# BASE MODEL WITH TRACKIO LOGGING
-# ============================================================================
-class AbstractFaceModel(pl.LightningModule):
-    def __init__(self, learning_rate, age_loss_weight):
-        super().__init__()
-        self.save_hyperparameters()
-        self.lr = learning_rate
-        self.age_weight = age_loss_weight
-        self.gender_loss_fn = nn.BCEWithLogitsLoss()
-        self.age_loss_fn = nn.MSELoss()
-        self.training_step_outputs = []
-        self.validation_step_outputs = []
-    def _calculate_losses(self, gender_preds, age_preds, gender_labels, age_labels):
-        gender_loss = self.gender_loss_fn(gender_preds.squeeze(), gender_labels)
-        age_loss = self.age_loss_fn(age_preds.squeeze(), age_labels)
-        total_loss = gender_loss + (age_loss * self.age_weight)
-        return total_loss, gender_loss, age_loss
-    def training_step(self, batch, batch_idx):
-        images, gender_labels, age_labels = batch
-        gender_preds, age_preds = self(images)
-        total_loss, gender_loss, age_loss = self._calculate_losses(
-            gender_preds, age_preds, gender_labels, age_labels
-        )
-        self.log('train_loss', total_loss, on_step=True, on_epoch=True, prog_bar=True)
-        self.training_step_outputs.append({
-            'loss_total': total_loss.detach(),
-            'loss_gender': gender_loss.detach(),
-            'loss_age': age_loss.detach()
-        })
-        # Log to TrackIO per step
-        try:
-            trackio.log({
-                'train/loss_total': total_loss.item(),
-                'train/loss_gender': gender_loss.item(),
-                'train/loss_age': age_loss.item(),
-                'step': self.global_step
-            })
-        except: pass
-        return total_loss
-    def on_train_epoch_end(self):
-        if len(self.training_step_outputs) > 0:
-            avg_total = torch.stack([x['loss_total'] for x in self.training_step_outputs]).mean()
-            avg_gender = torch.stack([x['loss_gender'] for x in self.training_step_outputs]).mean()
-            avg_age = torch.stack([x['loss_age'] for x in self.training_step_outputs]).mean()
-            try:
-                trackio.log({
-                    'train/epoch_loss_total': avg_total.item(),
-                    'train/epoch_loss_gender': avg_gender.item(),
-                    'train/epoch_loss_age': avg_age.item(),
-                    'epoch': self.current_epoch
-                })
-            except: pass
-            self.training_step_outputs.clear()
-    def validation_step(self, batch, batch_idx):
-        images, gender_labels, age_labels = batch
-        gender_preds, age_preds = self(images)
-        total_loss, gender_loss, age_loss = self._calculate_losses(
-            gender_preds, age_preds, gender_labels, age_labels
-        )
-        self.log('val_loss', total_loss, on_epoch=True, prog_bar=True)
-        self.validation_step_outputs.append({
-            'loss_total': total_loss.detach(),
-            'loss_gender': gender_loss.detach(),
-            'loss_age': age_loss.detach()
-        })
-    def on_validation_epoch_end(self):
-        if len(self.validation_step_outputs) > 0:
-            avg_total = torch.stack([x['loss_total'] for x in self.validation_step_outputs]).mean()
-            avg_gender = torch.stack([x['loss_gender'] for x in self.validation_step_outputs]).mean()
-            avg_age = torch.stack([x['loss_age'] for x in self.validation_step_outputs]).mean()
-            try:
-                trackio.log({
-                    'val/loss_total': avg_total.item(),
-                    'val/loss_gender': avg_gender.item(),
-                    'val/loss_age': avg_age.item(),
-                    'epoch': self.current_epoch
-                })
-            except: pass
-            self.validation_step_outputs.clear()
-    def configure_optimizers(self):
-        return torch.optim.Adam(self.parameters(), lr=self.lr)
-# ============================================================================
-# SCRATCH CNN MODEL
-# ============================================================================
-class ScratchCNNModel(AbstractFaceModel):
-    def __init__(self, learning_rate, age_loss_weight):
-        super().__init__(learning_rate, age_loss_weight)
-        def conv_block(in_f, out_f):
-            return nn.Sequential(
-                nn.Conv2d(in_f, out_f, 3, padding=1, bias=False),
-                nn.BatchNorm2d(out_f),
-                nn.ReLU(inplace=True),
-                nn.MaxPool2d(2, 2)
-            )
-        self.feature_extractor = nn.Sequential(
-            conv_block(3, 32), conv_block(32, 64),
-            conv_block(64, 128), conv_block(128, 256)
-        )
-        probe = torch.randn(1, 3, settings.INPUT_IMAGE_SIZE, settings.INPUT_IMAGE_SIZE)
-        flat_size = self.feature_extractor(probe).view(1, -1).size(1)
-        self.gender_head = nn.Linear(flat_size, 1)
-        self.age_head = nn.Linear(flat_size, 1)
-    def forward(self, x):
-        features = torch.flatten(self.feature_extractor(x), 1)
-        return self.gender_head(features), self.age_head(features)
-# ============================================================================
-# FINE-TUNED RESNET MODEL
-# ============================================================================
-class FineTunedResNetModel(AbstractFaceModel):
-    def __init__(self, learning_rate, age_loss_weight):
-        super().__init__(learning_rate, age_loss_weight)
-        resnet = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
-        num_features = resnet.fc.in_features
-        self.backbone = nn.Sequential(*list(resnet.children())[:-1])
-        self.gender_head = nn.Linear(num_features, 1)
-        self.age_head = nn.Linear(num_features, 1)
-    def forward(self, x):
-        features = torch.flatten(self.backbone(x), 1)
-        return self.gender_head(features), self.age_head(features)
-# ============================================================================
-# PIPELINE RUNNER (FIXED: NO KAGGLE SECRETS DEPENDENCY)
-# ============================================================================
-class PipelineRunner:
-    def __init__(self, cfg: PipelineSettings):
-        self.cfg = cfg
-        self.data_module = FaceDataModule(cfg)
-        self._setup_trackio()
-    def _setup_trackio(self):
-        """Setup HF token for TrackIO - works in Kaggle and locally"""
-        try:
-            # Method 1: Try Kaggle Secrets (only works in Kaggle)
-            from kaggle_secrets import UserSecretsClient
-            secrets = UserSecretsClient()
-            hf_token = secrets.get_secret("HUGGINGFACE_TOKEN")
-            os.environ["HF_TOKEN"] = hf_token
-            print("✅ TrackIO auth from Kaggle Secrets")
-        except:
-            # Method 2: Try environment variable (set in Kaggle Secrets or locally)
-            hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
-            if hf_token:
-                os.environ["HF_TOKEN"] = hf_token
-                print("✅ TrackIO auth from environment variable")
-            else:
-                # Method 3: Hardcode (for testing only - DO NOT commit to GitHub!)
-                # hf_token = "hf_YOUR_TOKEN_HERE"  # UNCOMMENT and add your token
-                # os.environ["HF_TOKEN"] = hf_token
-                print("⚠️ No HF token found. TrackIO may fail. Add to Kaggle Secrets or set HF_TOKEN env var.")
-    def _train_model(self, model, model_name, run_name):
-        print(f"\n{'='*70}\n🚀 Training: {model_name}\n{'='*70}")
-        # Initialize TrackIO
-        try:
-            trackio.init(
-                space_id="muhammad-bilal1/dlgenai-nppe",  # UPDATE: Your HF space
-                project="25-t3-nppe1",
-                group=run_name,
-                config={
-                    "lr": self.cfg.LEARNING_RATE,
-                    "epochs": self.cfg.NUM_EPOCHS,
-                    "batch_size": self.cfg.BATCH_SIZE,
-                    "model": model_name,
-                    "image_size": self.cfg.INPUT_IMAGE_SIZE,
-                    "age_weight": self.cfg.AGE_LOSS_WEIGHT
-                }
-            )
-            print(f"✅ TrackIO initialized: {run_name}")
-        except Exception as e:
-            print(f"⚠️ TrackIO init failed: {e}")
-        checkpoint_cb = ModelCheckpoint(
-            monitor='val_loss',
-            dirpath='/kaggle/working/',
-            filename=f'{model_name}-best-model',
-            save_top_k=1,
-            mode='min'
-        )
-        trainer = pl.Trainer(
-            max_epochs=self.cfg.NUM_EPOCHS,
-            accelerator='gpu',
-            devices='auto',
-            strategy="ddp_notebook",
-            callbacks=[checkpoint_cb],
-            log_every_n_steps=10
-        )
-        trainer.fit(model, self.data_module)
-        print(f"✅ Checkpoint: {checkpoint_cb.best_model_path}")
-        try:
-            final_val = trainer.callback_metrics.get('val_loss', torch.tensor(0.0)).item()
-            trackio.log({"final_val_loss": final_val})
-            trackio.finish()
-            print("✅ TrackIO run finished")
-        except Exception as e:
-            print(f"⚠️ TrackIO finish: {e}")
-        del model, trainer, checkpoint_cb
-        gc.collect()
-        torch.cuda.empty_cache()
-    def execute(self):
-        print("\n🔥 TRAINING PIPELINE STARTED\n")
-        scratch = ScratchCNNModel(self.cfg.LEARNING_RATE, self.cfg.AGE_LOSS_WEIGHT)
-        self._train_model(scratch, "scratch", "scratch-cnn-run")
-        finetuned = FineTunedResNetModel(self.cfg.LEARNING_RATE, self.cfg.AGE_LOSS_WEIGHT)
-        self._train_model(finetuned, "finetuned", "resnet-finetuned-run")
-        print("\n🎉 TRAINING COMPLETE!")
-        print("📂 Checkpoints: /kaggle/working/")
-        print("📊 TrackIO: https://huggingface.co/spaces/josondev/IITM-NPPE")
-# ============================================================================
-# RUN TRAINING
-# ============================================================================
 if __name__ == "__main__":
-    pipeline = PipelineRunner(settings)
-    pipeline.execute()

+# app.py - SIMPLE TRACKIO DASHBOARD (NO TRAINING)
 import trackio
+# Just launch the dashboard - training happens in Kaggle
+demo = trackio.demo()
 if __name__ == "__main__":
+    demo.launch()