Spaces:

ALYYAN
/

Age-and-Gender-detection

Runtime error

App Files Files Community

ALYYAN commited on Sep 25

Commit

bdb70cc

1 Parent(s): 30672d3

Complete baseline model with EfficientFormer

Browse files

Files changed (9) hide show

.gitignore +1 -1
app.py +196 -0
config/config.yaml +1 -1
dvc.lock +53 -0
requirements.txt +5 -3
src/cnnClassifier/__init__.py +26 -0
src/cnnClassifier/components/model_trainer.py +57 -45
src/cnnClassifier/config/configuration.py +13 -11
src/cnnClassifier/entity/config_entity.py +1 -0

.gitignore CHANGED Viewed

@@ -130,7 +130,7 @@ __pypackages__/
 # Celery stuff
 celerybeat-schedule
 celerybeat.pid
 # SageMath parsed files
 *.sage.py

 # Celery stuff
 celerybeat-schedule
 celerybeat.pid
+artifacts/data_ingestion/*
 # SageMath parsed files
 *.sage.py

app.py CHANGED Viewed

	@@ -0,0 +1,196 @@

+import streamlit as st
+import cv2
+import numpy as np
+from PIL import Image
+from transformers import pipeline
+from mtcnn import MTCNN
+from collections import defaultdict
+st.set_page_config(layout="wide", page_title="Facial Age Detection")
+st.title("Facial Age Detection")
+st.write("Detect age groups from images, videos, or a live webcam feed.")
+st.write("This application uses an EfficientFormer-L1 model fine-tuned on the Facial Age dataset.")
+# --- Helper Functions and Classes ---
+@st.cache_resource
+def load_model():
+    """Load the age detection model pipeline."""
+    model_path = "artifacts/model_trainer/facial_age_detector_model"
+    pipe = pipeline('image-classification', model=model_path, device=0) # Use 0 for GPU
+    return pipe
+@st.cache_resource
+def load_face_detector():
+    """Load the MTCNN face detector."""
+    return MTCNN()
+def iou(boxA, boxB):
+    """Calculate Intersection over Union."""
+    xA = max(boxA[0], boxB[0])
+    yA = max(boxA[1], boxB[1])
+    xB = min(boxA[2], boxB[2])
+    yB = min(boxA[3], boxB[3])
+    interArea = max(0, xB - xA) * max(0, yB - yA)
+    boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
+    boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])
+    return interArea / float(boxAArea + boxBArea - interArea)
+class EMATracker:
+    """Exponential Moving Average Tracker for smoothing predictions."""
+    def __init__(self, alpha=0.3):
+        self.alpha = alpha
+        self.tracked_objects = {} # {track_id: {box: [], ema_preds: {}}}
+    def update(self, detections, id_counter):
+        # Detections are a list of face boxes
+        # Simple tracking by IOU
+        # Match detections to existing tracks
+        matches = {} # {track_id: det_idx}
+        used_det_indices = set()
+        # This is a simple greedy matching. For more robust tracking, consider Hungarian algorithm.
+        for track_id, data in self.tracked_objects.items():
+            best_iou = 0
+            best_det_idx = -1
+            for i, det_box in enumerate(detections):
+                if i in used_det_indices: continue
+                current_iou = iou(data['box'], det_box)
+                if current_iou > best_iou and current_iou > 0.3: # IOU threshold
+                    best_iou = current_iou
+                    best_det_idx = i
+            if best_det_idx != -1:
+                matches[track_id] = best_det_idx
+                used_det_indices.add(best_det_idx)
+        # Update matched tracks
+        for track_id, det_idx in matches.items():
+            self.tracked_objects[track_id]['box'] = detections[det_idx]
+        # Add new tracks
+        for i, det_box in enumerate(detections):
+            if i not in used_det_indices:
+                self.tracked_objects[id_counter] = {'box': det_box, 'ema_preds': defaultdict(float)}
+                id_counter += 1
+        # Remove old tracks (optional, for long videos)
+        return id_counter
+    def apply_ema(self, track_id, new_preds):
+        """Applies EMA to the predictions for a given track."""
+        if track_id not in self.tracked_objects:
+            return {}
+        current_ema = self.tracked_objects[track_id]['ema_preds']
+        # Initialize if new
+        if not current_ema:
+            for pred in new_preds:
+                current_ema[pred['label']] = pred['score']
+        else:
+            # Update existing values
+            for pred in new_preds:
+                label = pred['label']
+                current_ema[label] = (self.alpha * pred['score']) + ((1 - self.alpha) * current_ema[label])
+        self.tracked_objects[track_id]['ema_preds'] = current_ema
+        # Return the top prediction from EMA
+        if not current_ema: return None
+        top_label = max(current_ema, key=current_ema.get)
+        return f"{top_label} ({current_ema[top_label]:.2f})"
+# --- Load Models ---
+try:
+    age_pipe = load_model()
+    face_detector = load_face_detector()
+except Exception as e:
+    st.error(f"Error loading models: {e}. Please ensure the model is trained and located at 'artifacts/model_trainer/facial_age_detector_model'.")
+    st.stop()
+# --- UI Sidebar ---
+st.sidebar.header("Input Options")
+app_mode = st.sidebar.selectbox("Choose the app mode", ["Image", "Video", "Live Webcam"])
+# --- Main App Logic ---
+if app_mode == "Image":
+    uploaded_file = st.sidebar.file_uploader("Upload an image...", type=["jpg", "jpeg", "png"])
+    if uploaded_file is not None:
+        image = Image.open(uploaded_file).convert("RGB")
+        img_array = np.array(image)
+        st.image(image, caption='Uploaded Image.', use_column_width=True)
+        st.write("")
+        st.write("Detecting faces and predicting age...")
+        faces = face_detector.detect_faces(img_array)
+        if not faces:
+            st.warning("No faces detected in the image.")
+        else:
+            for face in faces:
+                x, y, w, h = face['box']
+                face_img = img_array[y:y+h, x:x+w]
+                pil_face = Image.fromarray(face_img)
+                # Predict age
+                age_preds = age_pipe(pil_face)
+                top_pred = age_preds[0]
+                # Draw on image
+                cv2.rectangle(img_array, (x, y), (x+w, y+h), (0, 255, 0), 2)
+                label = f"Age: {top_pred['label']} ({top_pred['score']:.2f})"
+                cv2.putText(img_array, label, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0,255,0), 2)
+            st.image(img_array, caption='Processed Image.', use_column_width=True)
+elif app_mode == "Live Webcam":
+    st.sidebar.info("Webcam feed will start automatically. Press 'Stop' to end.")
+    run = st.sidebar.button('Start Webcam')
+    stop = st.sidebar.button('Stop Webcam')
+    FRAME_WINDOW = st.image([])
+    cap = cv2.VideoCapture(0)
+    tracker = EMATracker()
+    track_id_counter = 0
+    while run and not stop:
+        ret, frame = cap.read()
+        if not ret:
+            st.error("Failed to capture image from webcam.")
+            break
+        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        faces = face_detector.detect_faces(frame_rgb)
+        detection_boxes = [f['box'] for f in faces]
+        track_id_counter = tracker.update(detection_boxes, track_id_counter)
+        for track_id, data in tracker.tracked_objects.items():
+            x, y, w, h = data['box']
+            if w > 20 and h > 20: # Filter small detections
+                face_img = frame_rgb[y:y+h, x:x+w]
+                pil_face = Image.fromarray(face_img)
+                age_preds = age_pipe(pil_face)
+                smoothed_label = tracker.apply_ema(track_id, age_preds)
+                cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
+                if smoothed_label:
+                    cv2.putText(frame, smoothed_label, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2)
+        FRAME_WINDOW.image(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+    cap.release()
+    st.sidebar.info("Webcam stopped.")
+# Add a placeholder for Video processing, which would be similar to Webcam but with a file uploader.
+elif app_mode == "Video":
+    st.sidebar.warning("Video processing is similar to the webcam feed but processes a file. This feature is not fully implemented in this demo but follows the same logic.")
+    # You would use cv2.VideoCapture(video_path) and loop through frames.

config/config.yaml CHANGED Viewed

@@ -15,4 +15,4 @@ model_trainer:
   root_dir: artifacts/model_trainer
   trained_model_path: artifacts/model_trainer/facial_age_detector_model
   # Using EfficientFormer-L1, a much lighter model than ViT
-  model_name: "snap-research/efficientformer-l1"

   root_dir: artifacts/model_trainer
   trained_model_path: artifacts/model_trainer/facial_age_detector_model
   # Using EfficientFormer-L1, a much lighter model than ViT
+  model_name: "snap-research/efficientformer-l1-300"

dvc.lock ADDED Viewed

	@@ -0,0 +1,53 @@

+schema: '2.0'
+stages:
+  data_ingestion:
+    cmd: python src/cnnClassifier/pipeline/stage_01_data_ingestion.py
+    deps:
+    - path: config/config.yaml
+      hash: md5
+      md5: 3cea2dfb36f0a5e40dd599dad9458ca4
+      size: 609
+    - path: src/cnnClassifier/components/data_ingestion.py
+      hash: md5
+      md5: 80b591ef3eedaf256ef85f4d196a0d43
+      size: 1591
+    - path: src/cnnClassifier/pipeline/stage_01_data_ingestion.py
+      hash: md5
+      md5: 2e1c2ad52ddc9763ff2a241576a7477c
+      size: 904
+    outs:
+    - path: artifacts/data_ingestion
+      hash: md5
+      md5: 35941f86a72fc72e64cb3195753ae21d.dir
+      size: 1758455894
+      nfiles: 19557
+  model_training:
+    cmd: python src/cnnClassifier/pipeline/stage_02_model_training.py
+    deps:
+    - path: artifacts/data_ingestion
+      hash: md5
+      md5: 35941f86a72fc72e64cb3195753ae21d.dir
+      size: 1758455894
+      nfiles: 19557
+    - path: config/config.yaml
+      hash: md5
+      md5: 3cea2dfb36f0a5e40dd599dad9458ca4
+      size: 609
+    - path: params.yaml
+      hash: md5
+      md5: ce8c137aa11f22d0901fb41485e9bfde
+      size: 239
+    - path: src/cnnClassifier/components/model_trainer.py
+      hash: md5
+      md5: bc58a9fdc35492409863b38424773ef6
+      size: 8585
+    - path: src/cnnClassifier/pipeline/stage_02_model_training.py
+      hash: md5
+      md5: 374003acf88403924718ed5982007523
+      size: 829
+    outs:
+    - path: artifacts/model_trainer
+      hash: md5
+      md5: 621f61ba7beea89c3bef7a921afdcc9d.dir
+      size: 183039001
+      nfiles: 12

requirements.txt CHANGED Viewed

@@ -5,13 +5,14 @@ torchvision==0.16.0+cu118
 torchaudio==2.1.0
 # Pin NumPy to a version compatible with Torch 2.1.0
-numpy<2.0
 # Hugging Face
-transformers
 datasets>=2.14.5
 evaluate
-accelerate>=0.27
 # MLOps and Utilities
 mlflow
@@ -29,6 +30,7 @@ imblearn
 streamlit
 opencv-python
 mtcnn
 # AWS Deployment
 boto3

 torchaudio==2.1.0
 # Pin NumPy to a version compatible with Torch 2.1.0
+numpy>=1.23,<2.0
 # Hugging Face
+transformers==4.36.2
+tokenizers==0.15.0
 datasets>=2.14.5
 evaluate
+accelerate>=0.25
 # MLOps and Utilities
 mlflow
 streamlit
 opencv-python
 mtcnn
+tensorflow==2.15.0
 # AWS Deployment
 boto3

src/cnnClassifier/__init__.py CHANGED Viewed

	@@ -0,0 +1,26 @@

+import os
+import sys
+import logging
+# Define the logging format
+logging_str = "[%(asctime)s: %(levelname)s: %(module)s: %(message)s]"
+# Define the directory for log files
+log_dir = "logs"
+log_filepath = os.path.join(log_dir,"running_logs.log")
+os.makedirs(log_dir, exist_ok=True)
+# Configure the logging
+logging.basicConfig(
+    level= logging.INFO,
+    format= logging_str,
+    handlers=[
+        logging.FileHandler(log_filepath),  # Log to a file
+        logging.StreamHandler(sys.stdout)   # Also print to the console
+    ]
+)
+# Create the logger object
+logger = logging.getLogger("cnnClassifierLogger")

src/cnnClassifier/components/model_trainer.py CHANGED Viewed

@@ -2,32 +2,62 @@ import torch
 import pandas as pd
 from pathlib import Path
 from tqdm import tqdm
 from datasets import Dataset, Image, ClassLabel
 from imblearn.over_sampling import RandomOverSampler
 from transformers import (
     EfficientFormerImageProcessor,
     EfficientFormerForImageClassification,
     TrainingArguments,
-    Trainer,
-    DefaultDataCollator
 )
 from torchvision.transforms import (
     Compose,
     Normalize,
     RandomRotation,
-    RandomResizedCrop,
     RandomHorizontalFlip,
     Resize,
     ToTensor
 )
 import evaluate
 from cnnClassifier.entity.config_entity import ModelTrainerConfig
 class ModelTrainer:
     def __init__(self, config: ModelTrainerConfig):
         self.config = config
-        self.label2id = None
-        self.id2label = None
     def _prepare_data(self):
         logger.info("Preparing data...")
@@ -60,13 +90,11 @@ class ModelTrainer:
         file_names, labels = [], []
         data_path = Path(self.config.data_path)
         for file in tqdm(sorted(data_path.glob('*/*.*'))):
-            label = str(file).split('/')[-2]
             labels.append(label_dict[label])
             file_names.append(str(file))
         df = pd.DataFrame.from_dict({"image": file_names, "label": labels})
-        # Random oversampling
         ros = RandomOverSampler(random_state=self.config.random_state)
         df_resampled, y_resampled = ros.fit_resample(df[['image']], df['label'])
         df = pd.concat([df_resampled, y_resampled], axis=1)
@@ -74,72 +102,54 @@ class ModelTrainer:
         dataset = Dataset.from_pandas(df).cast_column("image", Image())
         labels_list = sorted(list(set(labels)))
-        self.label2id = {label: i for i, label in enumerate(labels_list)}
-        self.id2label = {i: label for i, label in enumerate(labels_list)}
         ClassLabels = ClassLabel(num_classes=len(labels_list), names=labels_list)
         dataset = dataset.map(lambda x: {'label': ClassLabels.str2int(x['label'])}, batched=True)
         dataset = dataset.cast_column('label', ClassLabels)
-        return dataset.train_test_split(test_size=self.config.test_split_size, shuffle=True, stratify_by_column="label")
     def train(self):
         device = "cuda" if torch.cuda.is_available() else "cpu"
         logger.info(f"Using device: {device}")
-        split_dataset = self._prepare_data()
         train_data = split_dataset['train']
         test_data = split_dataset['test']
         processor = EfficientFormerImageProcessor.from_pretrained(self.config.model_name)
-        image_mean, image_std = processor.image_mean, processor.image_std
-        size = self.config.image_size
-        normalize = Normalize(mean=image_mean, std=image_std)
         _train_transforms = Compose([
-            Resize((size, size)),
             RandomRotation(15),
             RandomHorizontalFlip(0.5),
             ToTensor(),
-            normalize
         ])
         _val_transforms = Compose([
-            Resize((size, size)),
             ToTensor(),
-            normalize
         ])
-        def train_transforms(examples):
-            examples['pixel_values'] = [_train_transforms(image.convert("RGB")) for image in examples['image']]
-            return examples
-        def val_transforms(examples):
-            examples['pixel_values'] = [_val_transforms(image.convert("RGB")) for image in examples['image']]
-            return examples
-        train_data.set_transform(train_transforms)
-        test_data.set_transform(val_transforms)
-        def collate_fn(examples):
-            pixel_values = torch.stack([example["pixel_values"] for example in examples])
-            labels = torch.tensor([example['label'] for example in examples])
-            return {"pixel_values": pixel_values, "labels": labels}
         model = EfficientFormerForImageClassification.from_pretrained(
             self.config.model_name,
-            num_labels=len(self.id2label),
-            id2label=self.id2label,
-            label2id=self.label2id,
-            ignore_mismatched_sizes=True # Important for transfer learning
         ).to(device)
-        accuracy = evaluate.load("accuracy")
-        def compute_metrics(eval_pred):
-            predictions, label_ids = eval_pred
-            predicted_labels = predictions.argmax(axis=1)
-            return accuracy.compute(predictions=predicted_labels, references=label_ids)
         args = TrainingArguments(
             output_dir=self.config.root_dir,
             logging_dir=f'{self.config.root_dir}/logs',
@@ -154,6 +164,8 @@ class ModelTrainer:
             load_best_model_at_end=True,
             metric_for_best_model="accuracy",
             save_total_limit=1,
             report_to="none"
         )

 import pandas as pd
 from pathlib import Path
 from tqdm import tqdm
+from functools import partial
 from datasets import Dataset, Image, ClassLabel
 from imblearn.over_sampling import RandomOverSampler
 from transformers import (
     EfficientFormerImageProcessor,
     EfficientFormerForImageClassification,
     TrainingArguments,
+    Trainer
 )
 from torchvision.transforms import (
     Compose,
     Normalize,
     RandomRotation,
     RandomHorizontalFlip,
     Resize,
     ToTensor
 )
 import evaluate
 from cnnClassifier.entity.config_entity import ModelTrainerConfig
+from cnnClassifier import logger
+# ==============================================================================
+# TOP-LEVEL FUNCTION DEFINITIONS (FOR PICKLING)
+# ==============================================================================
+def apply_transforms(batch, processor, transform_pipeline):
+    """Applies a given transformation pipeline to a batch of images."""
+    # Create the normalization transform with stats from the processor
+    normalize = Normalize(mean=processor.image_mean, std=processor.image_std)
+    # Combine the base transforms with normalization
+    full_transforms = Compose([*transform_pipeline.transforms, normalize])
+    # Apply to each image in the batch
+    batch["pixel_values"] = [full_transforms(img.convert("RGB")) for img in batch["image"]]
+    return batch
+def collate_fn(batch):
+    """A custom collate function for image classification."""
+    return {
+        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
+        'labels': torch.tensor([x['label'] for x in batch])
+    }
+def compute_metrics(eval_pred):
+    """Computes accuracy metric for evaluation."""
+    accuracy = evaluate.load("accuracy")
+    predictions, label_ids = eval_pred
+    predicted_labels = predictions.argmax(axis=1)
+    return accuracy.compute(predictions=predicted_labels, references=label_ids)
+# ==============================================================================
 class ModelTrainer:
     def __init__(self, config: ModelTrainerConfig):
         self.config = config
     def _prepare_data(self):
         logger.info("Preparing data...")
         file_names, labels = [], []
         data_path = Path(self.config.data_path)
         for file in tqdm(sorted(data_path.glob('*/*.*'))):
+            label = file.parent.name
             labels.append(label_dict[label])
             file_names.append(str(file))
         df = pd.DataFrame.from_dict({"image": file_names, "label": labels})
         ros = RandomOverSampler(random_state=self.config.random_state)
         df_resampled, y_resampled = ros.fit_resample(df[['image']], df['label'])
         df = pd.concat([df_resampled, y_resampled], axis=1)
         dataset = Dataset.from_pandas(df).cast_column("image", Image())
         labels_list = sorted(list(set(labels)))
+        label2id = {label: i for i, label in enumerate(labels_list)}
+        id2label = {i: label for i, label in enumerate(labels_list)}
         ClassLabels = ClassLabel(num_classes=len(labels_list), names=labels_list)
         dataset = dataset.map(lambda x: {'label': ClassLabels.str2int(x['label'])}, batched=True)
         dataset = dataset.cast_column('label', ClassLabels)
+        split_dataset = dataset.train_test_split(test_size=self.config.test_split_size, shuffle=True, stratify_by_column="label")
+        return split_dataset, id2label, label2id
     def train(self):
         device = "cuda" if torch.cuda.is_available() else "cpu"
         logger.info(f"Using device: {device}")
+        split_dataset, id2label, label2id = self._prepare_data()
         train_data = split_dataset['train']
         test_data = split_dataset['test']
         processor = EfficientFormerImageProcessor.from_pretrained(self.config.model_name)
+        # Define base transforms (without normalization)
         _train_transforms = Compose([
+            Resize((self.config.image_size, self.config.image_size)),
             RandomRotation(15),
             RandomHorizontalFlip(0.5),
             ToTensor(),
         ])
         _val_transforms = Compose([
+            Resize((self.config.image_size, self.config.image_size)),
             ToTensor(),
         ])
+        # Use functools.partial to create specialized versions of our top-level function
+        # This is a pickle-safe way to pass extra arguments (processor, transforms)
+        train_transform_func = partial(apply_transforms, processor=processor, transform_pipeline=_train_transforms)
+        val_transform_func = partial(apply_transforms, processor=processor, transform_pipeline=_val_transforms)
+        train_data.set_transform(train_transform_func)
+        test_data.set_transform(val_transform_func)
         model = EfficientFormerForImageClassification.from_pretrained(
             self.config.model_name,
+            num_labels=len(id2label),
+            id2label=id2label,
+            label2id=label2id,
+            ignore_mismatched_sizes=True
         ).to(device)
         args = TrainingArguments(
             output_dir=self.config.root_dir,
             logging_dir=f'{self.config.root_dir}/logs',
             load_best_model_at_end=True,
             metric_for_best_model="accuracy",
             save_total_limit=1,
+            remove_unused_columns=False,
+            dataloader_num_workers=4,
             report_to="none"
         )

src/cnnClassifier/config/configuration.py CHANGED Viewed

@@ -39,20 +39,22 @@ class ConfigurationManager:
     def get_model_trainer_config(self) -> ModelTrainerConfig:
         config = self.config.model_trainer
         params = self.params
         create_directories([config.root_dir])
         model_trainer_config = ModelTrainerConfig(
-            root_dir=Path(config.root_dir),
-            trained_model_path=Path(config.trained_model_path),
-            model_name=config.model_name,
-            image_size=params.IMAGE_SIZE,
-            learning_rate=params.LEARNING_RATE,
-            batch_size=params.BATCH_SIZE,
-            num_train_epochs=params.NUM_TRAIN_EPOCHS,
-            weight_decay=params.WEIGHT_DECAY,
-            warmup_steps=params.WARMUP_STEPS,
-            test_split_size=params.TEST_SPLIT_SIZE,
-            random_state=params.RANDOM_STATE
         )
         return model_trainer_config

     def get_model_trainer_config(self) -> ModelTrainerConfig:
         config = self.config.model_trainer
+        data_prep_config = self.config.data_preparation
         params = self.params
         create_directories([config.root_dir])
         model_trainer_config = ModelTrainerConfig(
+        root_dir=Path(config.root_dir),
+        data_path=Path(data_prep_config.data_path),
+        trained_model_path=Path(config.trained_model_path),
+        model_name=config.model_name,
+        image_size=int(params.IMAGE_SIZE),
+        learning_rate=float(params.LEARNING_RATE), # <<< CORRECTED
+        batch_size=int(params.BATCH_SIZE),
+        num_train_epochs=int(params.NUM_TRAIN_EPOCHS),
+        weight_decay=float(params.WEIGHT_DECAY), # <<< CORRECTED
+        warmup_steps=int(params.WARMUP_STEPS),
+        test_split_size=float(params.TEST_SPLIT_SIZE), # <<< CORRECTED
+        random_state=int(params.RANDOM_STATE)
         )
         return model_trainer_config

src/cnnClassifier/entity/config_entity.py CHANGED Viewed

@@ -17,6 +17,7 @@ class DataPreparationConfig:
 @dataclass(frozen=True)
 class ModelTrainerConfig:
     root_dir: Path
     trained_model_path: Path
     model_name: str
     image_size: int

 @dataclass(frozen=True)
 class ModelTrainerConfig:
     root_dir: Path
+    data_path: Path
     trained_model_path: Path
     model_name: str
     image_size: int