Spaces:

BoChay
/

DeepLearning

Sleeping

App Files Files Community

tanh1c commited on 24 days ago

Commit

d13c106

1 Parent(s): 94071ae

Add Gradio image demo

Browse files

Files changed (25) hide show

.gitattributes +2 -34
app.py +12 -0
assignments/assignment-1/app/__init__.py +1 -0
assignments/assignment-1/app/assets/style.css +135 -0
assignments/assignment-1/app/image/__init__.py +1 -0
assignments/assignment-1/app/image/data.py +74 -0
assignments/assignment-1/app/image/resnet18.py +430 -0
assignments/assignment-1/app/image/vit_b16.py +440 -0
assignments/assignment-1/app/main.py +403 -0
assignments/assignment-1/app/multimodal/README.md +12 -0
assignments/assignment-1/app/multimodal/__init__.py +1 -0
assignments/assignment-1/app/requirements.txt +7 -0
assignments/assignment-1/app/shared/__init__.py +1 -0
assignments/assignment-1/app/shared/artifact_utils.py +63 -0
assignments/assignment-1/app/shared/model_registry.py +134 -0
assignments/assignment-1/app/text/README.md +12 -0
assignments/assignment-1/app/text/__init__.py +1 -0
assignments/assignment-1/image/artifacts/cnn/resnet18_calibration_full.png +3 -0
assignments/assignment-1/image/artifacts/cnn/resnet18_calibration_metrics_full.json +43 -0
assignments/assignment-1/image/artifacts/vit/vit_b16_calibration_full.png +3 -0
assignments/assignment-1/image/artifacts/vit/vit_b16_calibration_metrics_full.json +43 -0
assignments/assignment-1/image/data/cifar-10-python.tar.gz +3 -0
assignments/assignment-1/image/models/resnet18_cifar10.pth +3 -0
assignments/assignment-1/image/models/vit_b16_cifar10.pth +3 -0
requirements.txt +6 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,3 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

 *.pth filter=lfs diff=lfs merge=lfs -text
+*.tar.gz filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import runpy
+ns = runpy.run_path(
+    "assignments/assignment-1/app/main.py",
+    run_name="hf_space_app",
+)
+demo = ns["create_app"]()
+if __name__ == "__main__":
+    demo.launch()

assignments/assignment-1/app/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # DL Assignment 1 - Application Demo

assignments/assignment-1/app/assets/style.css ADDED Viewed

	@@ -0,0 +1,135 @@

+/*
+   Deep Learning Assignment - Custom Style System
+   =============================================
+   Designed for a premium, dark-themed experience.
+*/
+/* Main container stabilization */
+.gradio-container {
+    max-width: 1200px !important;
+    margin: 0 auto !important;
+    font-family: 'Inter', system-ui, -apple-system, sans-serif !important;
+}
+/* Header & Title Styling */
+.app-header {
+    text-align: center;
+    padding: 30px 0;
+    border-bottom: 1px solid #30363d;
+    margin-bottom: 30px;
+    background: linear-gradient(to bottom, #161b22, #0d1117);
+}
+.app-header h1 {
+    font-weight: 800 !important;
+    letter-spacing: -0.02em;
+    background: linear-gradient(135deg, #58a6ff 0%, #bc8cff 100%);
+    -webkit-background-clip: text;
+    background-clip: text;
+    -webkit-text-fill-color: transparent;
+}
+/* Model Info Display */
+.model-info-box {
+    background: #161b22;
+    border: 1px solid #30363d;
+    border-radius: 12px;
+    padding: 24px;
+    margin: 15px 0;
+    box-shadow: 0 4px 20px rgba(0,0,0,0.3);
+}
+/* Prediction Result Premium Card */
+.prediction-label {
+    font-size: 26px !important;
+    font-weight: 700 !important;
+    text-align: center;
+    padding: 20px;
+    background: linear-gradient(135deg, #238636 0%, #2ea043 100%);
+    border-radius: 12px;
+    color: white !important;
+    margin: 15px 0;
+    box-shadow: 0 8px 32px rgba(35, 134, 54, 0.2);
+    border: 1px solid rgba(255,255,255,0.1);
+}
+/* Confidence Bars & Progress */
+.confidence-bar {
+    height: 32px;
+    border-radius: 8px;
+    background-color: #21262d;
+    overflow: hidden;
+    margin: 8px 0;
+    border: 1px solid #30363d;
+}
+/* Modern Tabs Navigation */
+.tab-nav {
+    border-bottom: 1px solid #30363d !important;
+    margin-bottom: 20px !important;
+}
+.tab-nav button {
+    font-size: 15px !important;
+    font-weight: 600 !important;
+    padding: 14px 28px !important;
+    color: #8b949e !important;
+    transition: all 0.2s ease !important;
+}
+.tab-nav button:hover {
+    color: #f0f6fc !important;
+    background-color: rgba(139, 148, 158, 0.1) !important;
+}
+.tab-nav button.selected {
+    color: #58a6ff !important;
+    border-bottom: 2px solid #58a6ff !important;
+    background: transparent !important;
+}
+/* Calibration Metric Cards */
+.metric-card {
+    background: #161b22;
+    border: 1px solid #30363d;
+    border-radius: 12px;
+    padding: 25px;
+    text-align: center;
+    transition: transform 0.2s ease;
+}
+.metric-card:hover {
+    transform: translateY(-2px);
+    border-color: #58a6ff;
+}
+/* Custom Buttons Styling */
+.gr-button-primary {
+    background: linear-gradient(135deg, #1f6feb 0%, #58a6ff 100%) !important;
+    border: none !important;
+    font-weight: 600 !important;
+    box-shadow: 0 4px 12px rgba(31, 111, 235, 0.3) !important;
+}
+.gr-button-primary:hover {
+    filter: brightness(1.1);
+    transform: translateY(-1px);
+}
+/* Footer Section */
+.app-footer {
+    text-align: center;
+    padding: 40px 20px;
+    color: #8b949e;
+    font-size: 14px;
+    border-top: 1px solid #30363d;
+    margin-top: 40px;
+    opacity: 0.8;
+}
+/* Glassmorphism utility */
+.glass {
+    background: rgba(22, 27, 34, 0.7) !important;
+    backdrop-filter: blur(10px) !important;
+    border: 1px solid rgba(48, 54, 61, 0.5) !important;
+}

assignments/assignment-1/app/image/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Image handlers for Assignment 1."""

assignments/assignment-1/app/image/data.py ADDED Viewed

	@@ -0,0 +1,74 @@

+"""
+Utilities for loading the CIFAR-10 test split from local project assets.
+The workspace keeps the archive at ``image/data/cifar-10-python.tar.gz``.
+Reading the test batch directly from that archive avoids permission issues
+with extracted files while keeping calibration fully offline.
+"""
+import os
+import pickle
+import tarfile
+from functools import lru_cache
+from typing import Tuple
+import numpy as np
+from PIL import Image
+from torch.utils.data import Dataset
+ASSIGNMENT_ROOT = os.path.dirname(
+    os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+)
+DEFAULT_DATA_DIR = os.path.join(ASSIGNMENT_ROOT, "image", "data")
+DEFAULT_ARCHIVE_PATH = os.path.join(DEFAULT_DATA_DIR, "cifar-10-python.tar.gz")
+@lru_cache(maxsize=1)
+def load_cifar10_test_arrays(
+    archive_path: str = DEFAULT_ARCHIVE_PATH,
+) -> Tuple[np.ndarray, np.ndarray]:
+    """Load CIFAR-10 test images and labels from the local archive."""
+    if not os.path.exists(archive_path):
+        raise FileNotFoundError(
+            f"CIFAR-10 archive not found at {archive_path}. "
+            "Expected image/data/cifar-10-python.tar.gz to exist."
+        )
+    with tarfile.open(archive_path, "r:gz") as tar:
+        member = tar.extractfile("cifar-10-batches-py/test_batch")
+        if member is None:
+            raise FileNotFoundError(
+                "Could not find cifar-10-batches-py/test_batch inside the archive."
+            )
+        batch = pickle.load(member, encoding="bytes")
+    images = batch[b"data"].reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)
+    labels = np.asarray(batch[b"labels"], dtype=np.int64)
+    return images, labels
+class LocalCIFAR10TestDataset(Dataset):
+    """Dataset wrapper that serves the CIFAR-10 test split from local files."""
+    def __init__(self, transform=None, archive_path: str = DEFAULT_ARCHIVE_PATH):
+        self.transform = transform
+        self.images, self.labels = load_cifar10_test_arrays(archive_path)
+    def __len__(self) -> int:
+        return len(self.labels)
+    def __getitem__(self, idx: int):
+        image = Image.fromarray(self.images[idx])
+        label = int(self.labels[idx])
+        if self.transform is not None:
+            image = self.transform(image)
+        return image, label
+def create_cifar10_test_dataset(transform=None) -> LocalCIFAR10TestDataset:
+    """Create the CIFAR-10 test dataset used by the calibration tab."""
+    return LocalCIFAR10TestDataset(transform=transform)

assignments/assignment-1/app/image/resnet18.py ADDED Viewed

	@@ -0,0 +1,430 @@

+"""
+CIFAR-10 ResNet-18 Model Handler
+Handles prediction, Grad-CAM visualization, and calibration
+for the ResNet-18 model trained on CIFAR-10.
+"""
+import os
+import torch
+import torch.nn as nn
+import numpy as np
+from PIL import Image
+from typing import Dict, List, Optional, Any
+import torchvision.transforms as transforms
+from torchvision.models import resnet18
+from app.shared.model_registry import (
+    BaseModelHandler,
+    PredictionResult,
+    CalibrationResult,
+)
+from app.shared.artifact_utils import (
+    get_best_accuracy_from_history,
+    load_precomputed_calibration_result,
+)
+from app.image.data import create_cifar10_test_dataset
+# CIFAR-10 class labels
+CIFAR10_LABELS = [
+    'Airplane', 'Automobile', 'Bird', 'Cat', 'Deer',
+    'Dog', 'Frog', 'Horse', 'Ship', 'Truck'
+]
+# CIFAR-10 normalization values
+CIFAR10_MEAN = (0.4914, 0.4822, 0.4465)
+CIFAR10_STD = (0.2470, 0.2435, 0.2616)
+# Image size ResNet expects
+IMAGE_SIZE = 224
+def create_resnet18_cifar10(num_classes=10):
+    """Create ResNet-18 with modified classifier for CIFAR-10."""
+    model = resnet18(weights=None)
+    num_features = model.fc.in_features
+    model.fc = nn.Linear(num_features, num_classes)
+    return model
+class GradCAM:
+    """
+    Grad-CAM implementation for visual explanation.
+    Generates heatmap showing which regions the model focuses on.
+    """
+    def __init__(self, model, target_layer):
+        self.model = model
+        self.target_layer = target_layer
+        self.gradients = None
+        self.activations = None
+        self._register_hooks()
+    def _register_hooks(self):
+        def forward_hook(module, input, output):
+            self.activations = output.detach()
+        def backward_hook(module, grad_input, grad_output):
+            self.gradients = grad_output[0].detach()
+        self.target_layer.register_forward_hook(forward_hook)
+        self.target_layer.register_full_backward_hook(backward_hook)
+    def generate(self, input_tensor, target_class=None):
+        """Generate Grad-CAM heatmap."""
+        self.model.eval()
+        output = self.model(input_tensor)
+        if target_class is None:
+            target_class = output.argmax(dim=1).item()
+        self.model.zero_grad()
+        one_hot = torch.zeros_like(output)
+        one_hot[0, target_class] = 1.0
+        output.backward(gradient=one_hot, retain_graph=True)
+        # Pool gradients across spatial dimensions
+        weights = self.gradients.mean(dim=[2, 3], keepdim=True)
+        cam = (weights * self.activations).sum(dim=1, keepdim=True)
+        cam = torch.relu(cam)
+        # Normalize
+        cam = cam - cam.min()
+        if cam.max() > 0:
+            cam = cam / cam.max()
+        # Resize to input size
+        cam = torch.nn.functional.interpolate(
+            cam, size=(IMAGE_SIZE, IMAGE_SIZE), mode='bilinear', align_corners=False
+        )
+        return cam.squeeze().cpu().numpy()
+def create_gradcam_overlay(image_np, heatmap, alpha=0.5):
+    """Create overlay of Grad-CAM heatmap on original image."""
+    import matplotlib
+    matplotlib.use('Agg')
+    import matplotlib.pyplot as plt
+    import matplotlib.cm as cm
+    # Apply colormap to heatmap
+    colormap = cm.jet(heatmap)[:, :, :3]  # Remove alpha channel
+    colormap = (colormap * 255).astype(np.uint8)
+    # Resize image to match heatmap
+    if image_np.shape[:2] != (IMAGE_SIZE, IMAGE_SIZE):
+        img_pil = Image.fromarray(image_np).resize((IMAGE_SIZE, IMAGE_SIZE))
+        image_np = np.array(img_pil)
+    # Create overlay
+    overlay = (alpha * colormap + (1 - alpha) * image_np).astype(np.uint8)
+    # Create figure with original + heatmap + overlay
+    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
+    fig.patch.set_facecolor('#0d1117')
+    titles = ['Original Image', 'Grad-CAM Heatmap', 'Overlay']
+    images = [image_np, colormap, overlay]
+    for ax, img, title in zip(axes, images, titles):
+        ax.imshow(img)
+        ax.set_title(title, color='white', fontsize=14, fontweight='bold', pad=10)
+        ax.axis('off')
+        ax.set_facecolor('#0d1117')
+    plt.tight_layout(pad=2)
+    # Convert figure to numpy array
+    fig.canvas.draw()
+    # Use buffer_rgba() which is more robust in newer matplotlib versions
+    rgba_buffer = fig.canvas.buffer_rgba()
+    result = np.array(rgba_buffer)[:, :, :3]  # Strip alpha channel
+    plt.close(fig)
+    return result
+class Cifar10ResNet18Handler(BaseModelHandler):
+    """Model handler for CIFAR-10 ResNet-18."""
+    def __init__(self, model_path: str):
+        self.model_path = model_path
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.model = None
+        self.grad_cam = None
+        self.history = {}
+        self.config = {}
+        self.best_accuracy = None
+        self._calibration_cache = {}
+        self.transform = transforms.Compose([
+            transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
+            transforms.ToTensor(),
+            transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD),
+        ])
+        self._load_model()
+    def _load_model(self):
+        """Load the trained model."""
+        self.model = create_resnet18_cifar10(num_classes=10)
+        if os.path.exists(self.model_path):
+            checkpoint = torch.load(self.model_path, map_location=self.device,
+                                    weights_only=True)
+            if isinstance(checkpoint, dict):
+                self.history = checkpoint.get('history', {}) or {}
+                self.config = checkpoint.get('config', {}) or {}
+                self.best_accuracy = get_best_accuracy_from_history(self.history)
+            # Handle both state_dict and full model saves
+            if isinstance(checkpoint, dict) and 'model_state_dict' in checkpoint:
+                self.model.load_state_dict(checkpoint['model_state_dict'])
+            elif isinstance(checkpoint, dict) and 'state_dict' in checkpoint:
+                self.model.load_state_dict(checkpoint['state_dict'])
+            else:
+                self.model.load_state_dict(checkpoint)
+        self.model = self.model.to(self.device)
+        self.model.eval()
+        # Initialize Grad-CAM with the last conv layer
+        self.grad_cam = GradCAM(self.model, self.model.layer4[-1])
+        precomputed_full = load_precomputed_calibration_result("resnet18")
+        if precomputed_full is not None:
+            self._calibration_cache["full"] = precomputed_full
+    def get_model_name(self) -> str:
+        return "ResNet-18"
+    def get_dataset_name(self) -> str:
+        return "CIFAR-10"
+    def get_data_type(self) -> str:
+        return "image"
+    def get_class_labels(self) -> List[str]:
+        return CIFAR10_LABELS
+    def get_model_info(self) -> Dict[str, str]:
+        total_params = sum(p.numel() for p in self.model.parameters())
+        best_accuracy = (
+            f"{self.best_accuracy:.2f}%"
+            if self.best_accuracy is not None
+            else "N/A"
+        )
+        info = {
+            "Architecture": "ResNet-18 (Transfer Learning from ImageNet)",
+            "Dataset": "CIFAR-10 (10 classes, 60,000 images)",
+            "Parameters": f"{total_params:,}",
+            "Input Size": f"{IMAGE_SIZE}×{IMAGE_SIZE}×3",
+            "Training": "Full fine-tune, AdamW, Cosine Annealing LR",
+            "Best Accuracy": best_accuracy,
+            "Device": str(self.device),
+        }
+        if "epochs" in self.config:
+            info["Epochs"] = str(self.config["epochs"])
+        full_result = self._calibration_cache.get("full")
+        if full_result is not None:
+            info["Full-Test ECE"] = f"{full_result.ece:.6f}"
+        return info
+    def predict(self, input_data) -> PredictionResult:
+        """Run prediction with Grad-CAM visualization."""
+        if input_data is None:
+            raise ValueError("No input image provided")
+        # Convert to PIL Image if numpy array
+        if isinstance(input_data, np.ndarray):
+            original_image = input_data.copy()
+            pil_image = Image.fromarray(input_data).convert('RGB')
+        else:
+            pil_image = input_data.convert('RGB')
+            original_image = np.array(pil_image)
+        # Preprocess
+        input_tensor = self.transform(pil_image).unsqueeze(0).to(self.device)
+        # Forward pass
+        with torch.no_grad():
+            output = self.model(input_tensor)
+            probabilities = torch.softmax(output, dim=1)[0]
+        probs = probabilities.cpu().numpy()
+        pred_idx = probs.argmax()
+        pred_label = CIFAR10_LABELS[pred_idx]
+        pred_conf = float(probs[pred_idx])
+        # Generate Grad-CAM
+        # Need to re-run with gradients enabled
+        input_tensor_grad = self.transform(pil_image).unsqueeze(0).to(self.device)
+        input_tensor_grad.requires_grad_(True)
+        heatmap = self.grad_cam.generate(input_tensor_grad, target_class=pred_idx)
+        explanation_image = create_gradcam_overlay(original_image, heatmap)
+        return PredictionResult(
+            label=pred_label,
+            confidence=pred_conf,
+            all_labels=CIFAR10_LABELS,
+            all_confidences=probs.tolist(),
+            explanation_image=explanation_image,
+        )
+    def get_example_inputs(self) -> List[Any]:
+        """Return example images from CIFAR-10 test set if available."""
+        return []
+    def get_calibration_data(
+        self, max_samples: Optional[int] = None
+    ) -> Optional[CalibrationResult]:
+        """
+        Compute calibration metrics on test set.
+        This runs evaluation on the full test set - can be slow on CPU.
+        """
+        cache_key = "full" if max_samples is None else f"subset:{max_samples}"
+        if cache_key in self._calibration_cache:
+            return self._calibration_cache[cache_key]
+        try:
+            import matplotlib
+            matplotlib.use('Agg')
+            import matplotlib.pyplot as plt
+            test_dataset = create_cifar10_test_dataset(transform=self.transform)
+            if max_samples is not None and 0 < max_samples < len(test_dataset):
+                indices = np.linspace(
+                    0, len(test_dataset) - 1, num=max_samples, dtype=int
+                ).tolist()
+                test_dataset = torch.utils.data.Subset(test_dataset, indices)
+            test_loader = torch.utils.data.DataLoader(
+                test_dataset, batch_size=128, shuffle=False, num_workers=0
+            )
+            all_probs = []
+            all_preds = []
+            all_targets = []
+            self.model.eval()
+            with torch.inference_mode():
+                for inputs, targets in test_loader:
+                    inputs = inputs.to(self.device)
+                    outputs = self.model(inputs)
+                    probs = torch.softmax(outputs, dim=1)
+                    preds = outputs.argmax(1)
+                    all_probs.extend(probs.cpu().numpy())
+                    all_preds.extend(preds.cpu().numpy())
+                    all_targets.extend(targets.numpy())
+            all_probs = np.array(all_probs)
+            all_preds = np.array(all_preds)
+            all_targets = np.array(all_targets)
+            # Compute ECE (Expected Calibration Error)
+            n_bins = 10
+            max_probs = np.max(all_probs, axis=1)
+            correctness = (all_preds == all_targets).astype(float)
+            bin_boundaries = np.linspace(0, 1, n_bins + 1)
+            bin_accuracies = []
+            bin_confidences = []
+            bin_counts = []
+            for i in range(n_bins):
+                lower = bin_boundaries[i]
+                upper = bin_boundaries[i + 1]
+                mask = (max_probs > lower) & (max_probs <= upper)
+                count = mask.sum()
+                bin_counts.append(int(count))
+                if count > 0:
+                    bin_acc = correctness[mask].mean()
+                    bin_conf = max_probs[mask].mean()
+                else:
+                    bin_acc = 0.0
+                    bin_conf = 0.0
+                bin_accuracies.append(float(bin_acc))
+                bin_confidences.append(float(bin_conf))
+            # Compute ECE
+            total = len(all_preds)
+            ece = sum(
+                (count / total) * abs(acc - conf)
+                for count, acc, conf in zip(bin_counts, bin_accuracies, bin_confidences)
+            )
+            # Create reliability diagram
+            fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
+            fig.patch.set_facecolor('#0d1117')
+            # Reliability Diagram
+            ax1.set_facecolor('#161b22')
+            bin_centers = [(bin_boundaries[i] + bin_boundaries[i + 1]) / 2 for i in range(n_bins)]
+            width = 0.08
+            bars1 = ax1.bar(
+                [c - width/2 for c in bin_centers], bin_accuracies, width,
+                label='Accuracy', color='#58a6ff', alpha=0.9, edgecolor='#58a6ff'
+            )
+            bars2 = ax1.bar(
+                [c + width/2 for c in bin_centers], bin_confidences, width,
+                label='Avg Confidence', color='#f97583', alpha=0.9, edgecolor='#f97583'
+            )
+            ax1.plot([0, 1], [0, 1], '--', color='#8b949e', linewidth=2,
+                     label='Perfect Calibration')
+            ax1.set_xlim(0, 1)
+            ax1.set_ylim(0, 1)
+            ax1.set_xlabel('Confidence', color='white', fontsize=12)
+            ax1.set_ylabel('Accuracy / Confidence', color='white', fontsize=12)
+            ax1.set_title(
+                f'Reliability Diagram (ECE: {ece:.4f})',
+                color='white', fontsize=14, fontweight='bold', pad=15
+            )
+            ax1.legend(facecolor='#161b22', edgecolor='#30363d',
+                       labelcolor='white', fontsize=10)
+            ax1.tick_params(colors='white')
+            for spine in ax1.spines.values():
+                spine.set_edgecolor('#30363d')
+            ax1.grid(True, alpha=0.1, color='white')
+            # Confidence histogram
+            ax2.set_facecolor('#161b22')
+            ax2.bar(
+                bin_centers, [c / total for c in bin_counts], 0.08,
+                color='#56d364', alpha=0.9, edgecolor='#56d364'
+            )
+            ax2.set_xlim(0, 1)
+            ax2.set_xlabel('Confidence', color='white', fontsize=12)
+            ax2.set_ylabel('Fraction of Samples', color='white', fontsize=12)
+            ax2.set_title(
+                'Confidence Distribution',
+                color='white', fontsize=14, fontweight='bold', pad=15
+            )
+            ax2.tick_params(colors='white')
+            for spine in ax2.spines.values():
+                spine.set_edgecolor('#30363d')
+            ax2.grid(True, alpha=0.1, color='white')
+            plt.tight_layout(pad=3)
+            # Convert to numpy
+            fig.canvas.draw()
+            rgba_buffer = fig.canvas.buffer_rgba()
+            diagram = np.array(rgba_buffer)[:, :, :3]  # Strip alpha channel
+            plt.close(fig)
+            self._calibration_cache[cache_key] = CalibrationResult(
+                ece=ece,
+                bin_accuracies=bin_accuracies,
+                bin_confidences=bin_confidences,
+                bin_counts=bin_counts,
+                reliability_diagram=diagram,
+                source="Live computation",
+            )
+            return self._calibration_cache[cache_key]
+        except Exception as e:
+            print(f"Error computing calibration: {e}")
+            return None

assignments/assignment-1/app/image/vit_b16.py ADDED Viewed

	@@ -0,0 +1,440 @@

+"""
+CIFAR-10 ViT-B/16 Model Handler
+Handles prediction, Grad-CAM visualization, and calibration
+for the ViT-B/16 model trained on CIFAR-10.
+"""
+import os
+import types
+import torch
+import torch.nn as nn
+import numpy as np
+from PIL import Image
+from typing import Dict, List, Optional, Any
+import torchvision.transforms as transforms
+from torchvision.models import vit_b_16
+from app.shared.model_registry import (
+    BaseModelHandler,
+    PredictionResult,
+    CalibrationResult,
+)
+from app.shared.artifact_utils import (
+    get_best_accuracy_from_history,
+    load_precomputed_calibration_result,
+)
+from app.image.data import create_cifar10_test_dataset
+# CIFAR-10 class labels
+CIFAR10_LABELS = [
+    'airplane', 'automobile', 'bird', 'cat', 'deer',
+    'dog', 'frog', 'horse', 'ship', 'truck'
+]
+# CIFAR-10 normalization values
+CIFAR10_MEAN = (0.4914, 0.4822, 0.4465)
+CIFAR10_STD = (0.2470, 0.2435, 0.2616)
+# Image size ViT expects
+IMAGE_SIZE = 224
+def create_vit_model(num_classes=10):
+    """Create ViT-B/16 with modified classifier for CIFAR-10."""
+    model = vit_b_16(weights=None)
+    # Replace classifier head
+    model.heads.head = nn.Linear(model.heads.head.in_features, num_classes)
+    return model
+class ViTAttentionVisualizer:
+    """
+    Attention visualization for ViT.
+    Shows which patches the model attends to.
+    """
+    def __init__(self, model):
+        self.model = model
+        self.attentions = None
+        self._patch_last_encoder_block()
+    def _patch_last_encoder_block(self):
+        """
+        Torchvision's ViT encoder block calls MultiheadAttention with
+        need_weights=False, so a normal forward hook never receives attention
+        maps. We patch only the last block to request weights during inference.
+        """
+        last_block = self.model.encoder.layers[-1]
+        visualizer = self
+        def forward_with_attention(block, input_tensor):
+            torch._assert(
+                input_tensor.dim() == 3,
+                f"Expected (batch_size, seq_length, hidden_dim) got {input_tensor.shape}",
+            )
+            x = block.ln_1(input_tensor)
+            attn_output, attn_weights = block.self_attention(
+                x,
+                x,
+                x,
+                need_weights=True,
+                average_attn_weights=False,
+            )
+            visualizer.attentions = attn_weights.detach()
+            x = block.dropout(attn_output)
+            x = x + input_tensor
+            y = block.ln_2(x)
+            y = block.mlp(y)
+            return x + y
+        last_block.forward = types.MethodType(forward_with_attention, last_block)
+    def generate_attention_map(self, input_tensor):
+        """Generate attention map from input tensor."""
+        self.model.eval()
+        # Forward pass
+        with torch.no_grad():
+            _ = self.model(input_tensor)
+        if self.attentions is None:
+            return None
+        # Get the [CLS] token attention across all heads
+        # Shape: (batch, heads, seq_len, seq_len) -> take cls token row
+        cls_attention = self.attentions[0, :, 0, 1:].mean(dim=0)  # Average over heads
+        # Reshape to patch grid (assuming 16x16 patches for 224x224 image)
+        num_patches = int(cls_attention.shape[0] ** 0.5)
+        if num_patches * num_patches != cls_attention.shape[0]:
+            # Fallback: just return raw attention
+            return cls_attention.cpu().numpy()
+        # Reshape to 2D grid
+        attention_map = cls_attention.reshape(num_patches, num_patches).cpu().numpy()
+        # Normalize
+        attention_map = attention_map - attention_map.min()
+        if attention_map.max() > 0:
+            attention_map = attention_map / attention_map.max()
+        return attention_map
+def create_attention_overlay(image_np, attention_map, alpha=0.5):
+    """Create overlay of attention map on original image."""
+    import matplotlib
+    matplotlib.use('Agg')
+    import matplotlib.pyplot as plt
+    import matplotlib.cm as cm
+    if attention_map is None:
+        return image_np
+    # Resize attention map to image size
+    from PIL import Image as PILImage
+    attention_uint8 = (attention_map * 255).astype(np.uint8)
+    attention_resized = PILImage.fromarray(attention_uint8).resize(
+        (IMAGE_SIZE, IMAGE_SIZE), PILImage.BILINEAR
+    )
+    attention_resized = np.array(attention_resized).astype(np.float32) / 255.0
+    if image_np.shape[:2] != (IMAGE_SIZE, IMAGE_SIZE):
+        image_np = np.array(
+            PILImage.fromarray(image_np).resize((IMAGE_SIZE, IMAGE_SIZE), PILImage.BILINEAR)
+        )
+    # Apply colormap
+    colormap = cm.jet(attention_resized)[:, :, :3]
+    colormap = (colormap * 255).astype(np.uint8)
+    # Create overlay
+    overlay = (alpha * colormap + (1 - alpha) * image_np).astype(np.uint8)
+    # Create figure
+    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
+    fig.patch.set_facecolor('#0d1117')
+    titles = ['Original Image', 'Attention Map', 'Overlay']
+    images = [image_np, colormap, overlay]
+    for ax, img, title in zip(axes, images, titles):
+        ax.imshow(img)
+        ax.set_title(title, color='white', fontsize=14, fontweight='bold', pad=10)
+        ax.axis('off')
+        ax.set_facecolor('#0d1117')
+    plt.tight_layout(pad=2)
+    fig.canvas.draw()
+    rgba_buffer = fig.canvas.buffer_rgba()
+    result = np.array(rgba_buffer)[:, :, :3]
+    plt.close(fig)
+    return result
+class Cifar10ViTHandler(BaseModelHandler):
+    """Model handler for CIFAR-10 ViT-B/16."""
+    def __init__(self, model_path: str):
+        self.model_path = model_path
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.model = None
+        self.attention_viz = None
+        self.history = {}
+        self.best_accuracy = None
+        self._calibration_cache = {}
+        self.transform = transforms.Compose([
+            transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
+            transforms.ToTensor(),
+            transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD),
+        ])
+        self._load_model()
+    def _load_model(self):
+        """Load the trained model."""
+        self.model = create_vit_model(num_classes=10)
+        if os.path.exists(self.model_path):
+            checkpoint = torch.load(self.model_path, map_location=self.device,
+                                 weights_only=True)
+            if isinstance(checkpoint, dict):
+                self.history = checkpoint.get('history', {}) or {}
+                self.best_accuracy = get_best_accuracy_from_history(self.history)
+            if isinstance(checkpoint, dict) and 'model_state_dict' in checkpoint:
+                self.model.load_state_dict(checkpoint['model_state_dict'])
+            else:
+                self.model.load_state_dict(checkpoint)
+        self.model = self.model.to(self.device)
+        self.model.eval()
+        # Initialize attention visualizer
+        self.attention_viz = ViTAttentionVisualizer(self.model)
+        precomputed_full = load_precomputed_calibration_result("vit_b16")
+        if precomputed_full is not None:
+            self._calibration_cache["full"] = precomputed_full
+    def get_model_name(self) -> str:
+        return "ViT-B/16"
+    def get_dataset_name(self) -> str:
+        return "CIFAR-10"
+    def get_data_type(self) -> str:
+        return "image"
+    def get_class_labels(self) -> List[str]:
+        return CIFAR10_LABELS
+    def get_model_info(self) -> Dict[str, str]:
+        total_params = sum(p.numel() for p in self.model.parameters())
+        best_accuracy = (
+            f"{self.best_accuracy:.2f}%"
+            if self.best_accuracy is not None
+            else "N/A"
+        )
+        info = {
+            "Architecture": "ViT-B/16 (Transfer Learning from ImageNet)",
+            "Dataset": "CIFAR-10 (10 classes, 60,000 images)",
+            "Parameters": f"{total_params:,}",
+            "Input Size": f"{IMAGE_SIZE}×{IMAGE_SIZE}×3",
+            "Training": "Full fine-tune, AdamW, Cosine Annealing LR",
+            "Best Accuracy": best_accuracy,
+            "Device": str(self.device),
+        }
+        if self.history:
+            info["Epochs"] = str(len(self.history.get("val_acc", [])))
+        full_result = self._calibration_cache.get("full")
+        if full_result is not None:
+            info["Full-Test ECE"] = f"{full_result.ece:.6f}"
+        return info
+    def predict(self, input_data) -> PredictionResult:
+        """Run prediction with attention visualization."""
+        if input_data is None:
+            raise ValueError("No input image provided")
+        # Convert to PIL Image if numpy array
+        if isinstance(input_data, np.ndarray):
+            original_image = input_data.copy()
+            pil_image = Image.fromarray(input_data).convert('RGB')
+        else:
+            pil_image = input_data.convert('RGB')
+            original_image = np.array(pil_image)
+        # Preprocess
+        input_tensor = self.transform(pil_image).unsqueeze(0).to(self.device)
+        # Forward pass
+        with torch.no_grad():
+            output = self.model(input_tensor)
+            probabilities = torch.softmax(output, dim=1)[0]
+        probs = probabilities.cpu().numpy()
+        pred_idx = probs.argmax()
+        pred_label = CIFAR10_LABELS[pred_idx]
+        pred_conf = float(probs[pred_idx])
+        # Generate attention visualization
+        attention_map = self.attention_viz.generate_attention_map(input_tensor)
+        explanation_image = create_attention_overlay(original_image, attention_map)
+        return PredictionResult(
+            label=pred_label,
+            confidence=pred_conf,
+            all_labels=CIFAR10_LABELS,
+            all_confidences=probs.tolist(),
+            explanation_image=explanation_image,
+        )
+    def get_example_inputs(self) -> List[Any]:
+        return []
+    def get_calibration_data(
+        self, max_samples: Optional[int] = None
+    ) -> Optional[CalibrationResult]:
+        """Compute calibration metrics on test set."""
+        cache_key = "full" if max_samples is None else f"subset:{max_samples}"
+        if cache_key in self._calibration_cache:
+            return self._calibration_cache[cache_key]
+        try:
+            import matplotlib
+            matplotlib.use('Agg')
+            import matplotlib.pyplot as plt
+            test_dataset = create_cifar10_test_dataset(transform=self.transform)
+            if max_samples is not None and 0 < max_samples < len(test_dataset):
+                indices = np.linspace(
+                    0, len(test_dataset) - 1, num=max_samples, dtype=int
+                ).tolist()
+                test_dataset = torch.utils.data.Subset(test_dataset, indices)
+            test_loader = torch.utils.data.DataLoader(
+                test_dataset, batch_size=128, shuffle=False, num_workers=0
+            )
+            all_probs = []
+            all_preds = []
+            all_targets = []
+            self.model.eval()
+            with torch.inference_mode():
+                for inputs, targets in test_loader:
+                    inputs = inputs.to(self.device)
+                    outputs = self.model(inputs)
+                    probs = torch.softmax(outputs, dim=1)
+                    preds = outputs.argmax(1)
+                    all_probs.extend(probs.cpu().numpy())
+                    all_preds.extend(preds.cpu().numpy())
+                    all_targets.extend(targets.numpy())
+            all_probs = np.array(all_probs)
+            all_preds = np.array(all_preds)
+            all_targets = np.array(all_targets)
+            # Compute ECE
+            n_bins = 10
+            max_probs = np.max(all_probs, axis=1)
+            correctness = (all_preds == all_targets).astype(float)
+            bin_boundaries = np.linspace(0, 1, n_bins + 1)
+            bin_accuracies = []
+            bin_confidences = []
+            bin_counts = []
+            for i in range(n_bins):
+                lower = bin_boundaries[i]
+                upper = bin_boundaries[i + 1]
+                mask = (max_probs > lower) & (max_probs <= upper)
+                count = mask.sum()
+                bin_counts.append(int(count))
+                if count > 0:
+                    bin_acc = correctness[mask].mean()
+                    bin_conf = max_probs[mask].mean()
+                else:
+                    bin_acc = 0.0
+                    bin_conf = 0.0
+                bin_accuracies.append(float(bin_acc))
+                bin_confidences.append(float(bin_conf))
+            # Compute ECE
+            total = len(all_preds)
+            ece = sum(
+                (count / total) * abs(acc - conf)
+                for count, acc, conf in zip(bin_counts, bin_accuracies, bin_confidences)
+            )
+            # Create reliability diagram
+            fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
+            fig.patch.set_facecolor('#0d1117')
+            # Reliability Diagram
+            ax1.set_facecolor('#161b22')
+            bin_centers = [(bin_boundaries[i] + bin_boundaries[i + 1]) / 2 for i in range(n_bins)]
+            width = 0.08
+            ax1.bar([c - width/2 for c in bin_centers], bin_accuracies, width,
+                   label='Accuracy', color='#58a6ff', alpha=0.9, edgecolor='#58a6ff')
+            ax1.bar([c + width/2 for c in bin_centers], bin_confidences, width,
+                   label='Avg Confidence', color='#f97583', alpha=0.9, edgecolor='#f97583')
+            ax1.plot([0, 1], [0, 1], '--', color='#8b949e', linewidth=2,
+                    label='Perfect Calibration')
+            ax1.set_xlim(0, 1)
+            ax1.set_ylim(0, 1)
+            ax1.set_xlabel('Confidence', color='white', fontsize=12)
+            ax1.set_ylabel('Accuracy / Confidence', color='white', fontsize=12)
+            ax1.set_title(f'Reliability Diagram (ECE: {ece:.4f})',
+                          color='white', fontsize=14, fontweight='bold', pad=15)
+            ax1.legend(facecolor='#161b22', edgecolor='#30363d', labelcolor='white', fontsize=10)
+            ax1.tick_params(colors='white')
+            for spine in ax1.spines.values():
+                spine.set_edgecolor('#30363d')
+            ax1.grid(True, alpha=0.1, color='white')
+            # Confidence histogram
+            ax2.set_facecolor('#161b22')
+            ax2.bar(bin_centers, [c / total for c in bin_counts], 0.08,
+                   color='#56d364', alpha=0.9, edgecolor='#56d364')
+            ax2.set_xlim(0, 1)
+            ax2.set_xlabel('Confidence', color='white', fontsize=12)
+            ax2.set_ylabel('Fraction of Samples', color='white', fontsize=12)
+            ax2.set_title('Confidence Distribution',
+                          color='white', fontsize=14, fontweight='bold', pad=15)
+            ax2.tick_params(colors='white')
+            for spine in ax2.spines.values():
+                spine.set_edgecolor('#30363d')
+            ax2.grid(True, alpha=0.1, color='white')
+            plt.tight_layout(pad=3)
+            fig.canvas.draw()
+            rgba_buffer = fig.canvas.buffer_rgba()
+            diagram = np.array(rgba_buffer)[:, :, :3]
+            plt.close(fig)
+            self._calibration_cache[cache_key] = CalibrationResult(
+                ece=ece,
+                bin_accuracies=bin_accuracies,
+                bin_confidences=bin_confidences,
+                bin_counts=bin_counts,
+                reliability_diagram=diagram,
+                source="Live computation",
+            )
+            return self._calibration_cache[cache_key]
+        except Exception as e:
+            print(f"Error computing calibration: {e}")
+            return None

assignments/assignment-1/app/main.py ADDED Viewed

	@@ -0,0 +1,403 @@

+"""
+Deep Learning Assignment 1 - Application Demo
+===============================================
+A modular Gradio application for demonstrating
+trained models on Image, Text, and Multimodal datasets.
+Features:
+- Image classification with Grad-CAM / attention visualization
+- Model Calibration analysis (ECE + Reliability Diagram)
+- Easy to extend with new models/datasets
+Usage:
+    python assignments/assignment-1/app/main.py
+"""
+import sys
+import os
+# Add assignment root to path so `app.*` imports keep working.
+ASSIGNMENT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.insert(0, ASSIGNMENT_ROOT)
+import gradio as gr
+from typing import Dict
+from app.shared.model_registry import (
+    register_model,
+    get_all_model_keys,
+    get_models_by_type,
+    BaseModelHandler,
+)
+from app.image.resnet18 import Cifar10ResNet18Handler
+from app.image.vit_b16 import Cifar10ViTHandler
+# ============================================================================
+# CONFIGURATION
+# ============================================================================
+APP_TITLE = "🧠 Deep Learning Assignment 1 - Demo"
+APP_DESCRIPTION = """
+<div style="text-align: center; padding: 10px 0;">
+    <p style="font-size: 16px; color: #8b949e; margin: 5px 0;">
+        Classification on Images, Text, and Multimodal Data
+    </p>
+    <p style="font-size: 14px; color: #58a6ff; margin: 5px 0;">
+        CO3091 · HCM University of Technology · 2025-2026 Semester 2
+    </p>
+</div>
+"""
+# Load custom CSS from external file
+CSS_PATH = os.path.join(os.path.dirname(__file__), "assets", "style.css")
+if os.path.exists(CSS_PATH):
+    with open(CSS_PATH, "r", encoding="utf-8") as f:
+        CUSTOM_CSS = f.read()
+else:
+    CUSTOM_CSS = ""
+CUSTOM_THEME = gr.themes.Base(
+    primary_hue=gr.themes.colors.blue,
+    secondary_hue=gr.themes.colors.green,
+    neutral_hue=gr.themes.colors.gray,
+    font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"],
+).set(
+    body_background_fill="#0d1117",
+    body_background_fill_dark="#0d1117",
+    block_background_fill="#161b22",
+    block_background_fill_dark="#161b22",
+    block_border_color="#30363d",
+    block_border_color_dark="#30363d",
+    block_label_text_color="#c9d1d9",
+    block_label_text_color_dark="#c9d1d9",
+    block_title_text_color="#f0f6fc",
+    block_title_text_color_dark="#f0f6fc",
+    body_text_color="#c9d1d9",
+    body_text_color_dark="#c9d1d9",
+    body_text_color_subdued="#8b949e",
+    body_text_color_subdued_dark="#8b949e",
+    button_primary_background_fill="#238636",
+    button_primary_background_fill_dark="#238636",
+    button_primary_background_fill_hover="#2ea043",
+    button_primary_background_fill_hover_dark="#2ea043",
+    button_primary_text_color="white",
+    button_primary_text_color_dark="white",
+    input_background_fill="#0d1117",
+    input_background_fill_dark="#0d1117",
+    input_border_color="#30363d",
+    input_border_color_dark="#30363d",
+    shadow_drop="none",
+    shadow_drop_lg="none",
+)
+# ============================================================================
+# MODEL INITIALIZATION
+# ============================================================================
+def init_models():
+    """Initialize and register all available models."""
+    model_dir = os.path.join(ASSIGNMENT_ROOT, "image", "models")
+    # CIFAR-10 ResNet-18
+    resnet18_path = os.path.join(model_dir, "resnet18_cifar10.pth")
+    if os.path.exists(resnet18_path):
+        try:
+            handler = Cifar10ResNet18Handler(resnet18_path)
+            register_model("cifar10_resnet18", handler)
+            print(f"✅ Loaded: CIFAR-10 ResNet-18 from {resnet18_path}")
+        except Exception as e:
+            print(f"❌ Failed to load CIFAR-10 ResNet-18: {e}")
+    else:
+        print(f"⚠️ Model file not found: {resnet18_path}")
+    # CIFAR-10 ViT-B/16
+    vit_path = os.path.join(model_dir, "vit_b16_cifar10.pth")
+    if os.path.exists(vit_path):
+        try:
+            handler = Cifar10ViTHandler(vit_path)
+            register_model("cifar10_vit", handler)
+            print(f"✅ Loaded: CIFAR-10 ViT-B/16 from {vit_path}")
+        except Exception as e:
+            print(f"❌ Failed to load CIFAR-10 ViT-B/16: {e}")
+    else:
+        print(f"⚠️ Model file not found: {vit_path}")
+# ============================================================================
+# UI BUILDER FUNCTIONS
+# ============================================================================
+def format_confidence_label(labels, confidences, top_k=5):
+    """Format top-k predictions as a dictionary for gr.Label."""
+    paired = sorted(zip(labels, confidences), key=lambda x: x[1], reverse=True)
+    return {label: float(conf) for label, conf in paired[:top_k]}
+def build_model_info_markdown(handler: BaseModelHandler) -> str:
+    """Build formatted model info markdown."""
+    info = handler.get_model_info()
+    lines = ["### 📋 Model Information\n"]
+    for key, val in info.items():
+        lines.append(f"| **{key}** | {val} |")
+    header = "| Property | Value |\n|:---|:---|\n"
+    table_lines = [line for line in lines[1:]]
+    return lines[0] + header + "\n".join(table_lines)
+def build_image_prediction_tab(model_key: str, handler: BaseModelHandler):
+    """Build the prediction tab UI for image models."""
+    with gr.Row(equal_height=True):
+        with gr.Column(scale=1):
+            input_image = gr.Image(
+                label="📸 Upload Image",
+                type="numpy",
+                height=300,
+                sources=["upload", "clipboard"],
+            )
+            predict_btn = gr.Button(
+                "🔍 Predict & Explain",
+                variant="primary",
+                size="lg",
+            )
+            gr.Markdown(
+                f"*Classes: {', '.join(handler.get_class_labels())}*",
+                elem_classes=["text-sm"],
+            )
+        with gr.Column(scale=1):
+            output_label = gr.Label(
+                label="📊 Prediction Results (Top-5)",
+                num_top_classes=5,
+            )
+    with gr.Row():
+        explanation_image = gr.Image(
+            label="🔥 Model Explanation (Interpretability)",
+            interactive=False,
+            height=350,
+        )
+    def do_predict(image):
+        if image is None:
+            return None, None
+        try:
+            result = handler.predict(image)
+            conf_dict = format_confidence_label(
+                result.all_labels, result.all_confidences
+            )
+            return conf_dict, result.explanation_image
+        except Exception as e:
+            raise gr.Error(f"Prediction failed: {str(e)}")
+    predict_btn.click(
+        fn=do_predict,
+        inputs=[input_image],
+        outputs=[output_label, explanation_image],
+    )
+def build_calibration_tab(model_key: str, handler: BaseModelHandler):
+    """Build the calibration analysis tab."""
+    gr.Markdown("""
+    ### 📐 Model Calibration Analysis
+    Calibration measures how well the model's confidence matches its actual accuracy.
+    A perfectly calibrated model has **confidence = accuracy** for all predictions.
+    - **ECE (Expected Calibration Error)**: Lower is better (0 = perfect calibration)
+    - **Reliability Diagram**: Compares predicted confidence vs actual accuracy per bin
+    - **Quick Preview**: Uses a very small subset for fast CPU demos
+    - **Full Test Set**: Uses notebook artifacts instantly when available
+    """)
+    calibration_mode = gr.Radio(
+        choices=[
+            "Quick Preview (64 samples)",
+            "Full Test Set (10,000 samples)",
+        ],
+        value="Quick Preview (64 samples)",
+        label="Calibration Mode",
+    )
+    compute_btn = gr.Button(
+        "📊 Compute Calibration",
+        variant="primary",
+        size="lg",
+    )
+    ece_display = gr.Markdown(visible=False)
+    calibration_plot = gr.Image(
+        label="📈 Calibration Analysis",
+        interactive=False,
+        visible=False,
+        height=450,
+    )
+    def compute_calibration(mode):
+        try:
+            max_samples = 64 if mode.startswith("Quick Preview") else None
+            result = handler.get_calibration_data(max_samples=max_samples)
+            if result is None:
+                raise gr.Error("Could not compute calibration data")
+            sample_note = (
+                "Approximate preview on 64 evenly spaced test images"
+                if max_samples is not None
+                else "Full CIFAR-10 test set"
+            )
+            source_note = result.source or "Live computation"
+            ece_md = f"""
+### Calibration Metrics
+| Metric | Value |
+|:---|:---|
+| **Mode** | {sample_note} |
+| **Source** | {source_note} |
+| **Expected Calibration Error (ECE)** | `{result.ece:.6f}` |
+| **Interpretation** | {'✅ Well calibrated' if result.ece < 0.05 else '⚠️ Moderately calibrated' if result.ece < 0.15 else '❌ Poorly calibrated'} |
+| **Total evaluated samples** | {sum(result.bin_counts):,} |
+"""
+            return (
+                gr.update(value=ece_md, visible=True),
+                gr.update(value=result.reliability_diagram, visible=True),
+            )
+        except Exception as e:
+            raise gr.Error(f"Calibration computation failed: {str(e)}")
+    compute_btn.click(
+        fn=compute_calibration,
+        inputs=[calibration_mode],
+        outputs=[ece_display, calibration_plot],
+    )
+def build_model_tabs(model_key: str, handler: BaseModelHandler):
+    """Build all tabs for a specific model."""
+    gr.Markdown(build_model_info_markdown(handler))
+    with gr.Tabs():
+        with gr.Tab("🎯 Predict & Explain", id="predict"):
+            data_type = handler.get_data_type()
+            if data_type == "image":
+                build_image_prediction_tab(model_key, handler)
+            elif data_type == "text":
+                gr.Markdown("### 📝 Text Classification\n*Coming soon...*")
+            elif data_type == "multimodal":
+                gr.Markdown("### 🖼️+📝 Multimodal Classification\n*Coming soon...*")
+        with gr.Tab("📐 Calibration", id="calibration"):
+            build_calibration_tab(model_key, handler)
+# ============================================================================
+# MAIN APPLICATION
+# ============================================================================
+def create_app() -> gr.Blocks:
+    """Create the main Gradio application."""
+    init_models()
+    with gr.Blocks(
+        title="DL Assignment 1 - Demo",
+    ) as app:
+        gr.Markdown(f"# {APP_TITLE}")
+        gr.Markdown(APP_DESCRIPTION)
+        model_keys = get_all_model_keys()
+        if not model_keys:
+            gr.Markdown("""
+            ## ⚠️ No Models Loaded
+            Please ensure model files are in the `image/models/` directory.
+            See the README for instructions on adding models.
+            """)
+        else:
+            image_models = get_models_by_type("image")
+            text_models = get_models_by_type("text")
+            multimodal_models = get_models_by_type("multimodal")
+            with gr.Tabs():
+                if image_models:
+                    with gr.Tab("🖼️ Image Classification", id="image_tab"):
+                        if len(image_models) > 1:
+                            with gr.Tabs():
+                                for key, handler in image_models.items():
+                                    tab_name = f"{handler.get_model_name()} ({handler.get_dataset_name()})"
+                                    with gr.Tab(tab_name):
+                                        build_model_tabs(key, handler)
+                        else:
+                            key, handler = next(iter(image_models.items()))
+                            build_model_tabs(key, handler)
+                if text_models:
+                    with gr.Tab("📝 Text Classification", id="text_tab"):
+                        if len(text_models) > 1:
+                            with gr.Tabs():
+                                for key, handler in text_models.items():
+                                    tab_name = f"{handler.get_model_name()} ({handler.get_dataset_name()})"
+                                    with gr.Tab(tab_name):
+                                        build_model_tabs(key, handler)
+                        else:
+                            key, handler = next(iter(text_models.items()))
+                            build_model_tabs(key, handler)
+                if multimodal_models:
+                    with gr.Tab("🔀 Multimodal Classification", id="mm_tab"):
+                        if len(multimodal_models) > 1:
+                            with gr.Tabs():
+                                for key, handler in multimodal_models.items():
+                                    tab_name = f"{handler.get_model_name()} ({handler.get_dataset_name()})"
+                                    with gr.Tab(tab_name):
+                                        build_model_tabs(key, handler)
+                        else:
+                            key, handler = next(iter(multimodal_models.items()))
+                            build_model_tabs(key, handler)
+                if not text_models:
+                    with gr.Tab("📝 Text Classification", id="text_tab"):
+                        gr.Markdown("""
+                        ### 📝 Text Classification Models
+                        *No text models loaded yet. Add your text model handler
+                        and register it in `app/main.py`.*
+                        """)
+                if not multimodal_models:
+                    with gr.Tab("🔀 Multimodal Classification", id="mm_tab"):
+                        gr.Markdown("""
+                        ### 🔀 Multimodal Classification Models
+                        *No multimodal models loaded yet. Add your multimodal
+                        model handler and register it in `app/main.py`.*
+                        """)
+        gr.Markdown("""
+        <div class="app-footer">
+            <p>Deep Learning and Its Applications · Assignment 1</p>
+            <p>HCM University of Technology (HCMUT) · VNUHCM</p>
+        </div>
+        """)
+    return app
+# ============================================================================
+# ENTRY POINT
+# ============================================================================
+if __name__ == "__main__":
+    app = create_app()
+    app.launch(
+        server_name="127.0.0.1",
+        server_port=5555,
+        share=False,
+        show_error=True,
+        theme=CUSTOM_THEME,
+        css=CUSTOM_CSS,
+        allowed_paths=[os.path.join(ASSIGNMENT_ROOT, "image", "artifacts")],
+    )

assignments/assignment-1/app/multimodal/README.md ADDED Viewed

	@@ -0,0 +1,12 @@

+# Multimodal App Modules
+Place multimodal-specific inference handlers here.
+Suggested additions:
+- multimodal model wrapper classes
+- joint preprocessing helpers
+- prediction utilities
+- demo-specific visualization helpers
+After adding a handler, register it in `assignments/assignment-1/app/main.py`.

assignments/assignment-1/app/multimodal/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Multimodal model handlers for Assignment 1."""

assignments/assignment-1/app/requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+# Deep Learning Assignment 1 - Application Demo Dependencies
+torch>=2.0.0
+torchvision>=0.15.0
+gradio>=5.0.0
+numpy>=1.24.0
+Pillow>=9.0.0
+matplotlib>=3.7.0

assignments/assignment-1/app/shared/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Shared app utilities for Assignment 1."""

assignments/assignment-1/app/shared/artifact_utils.py ADDED Viewed

	@@ -0,0 +1,63 @@

+"""
+Helpers for reading notebook-generated artifacts and training metadata.
+"""
+from __future__ import annotations
+import json
+import os
+from pathlib import Path
+from typing import Any, Dict, Optional
+from .model_registry import CalibrationResult
+ASSIGNMENT_ROOT = Path(
+    os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+)
+ARTIFACTS_DIR = ASSIGNMENT_ROOT / "image" / "artifacts"
+def get_best_accuracy_from_history(history: Optional[Dict[str, Any]]) -> Optional[float]:
+    """Return the best validation accuracy found in a checkpoint history."""
+    if not history:
+        return None
+    val_acc = history.get("val_acc")
+    if isinstance(val_acc, list) and val_acc:
+        return float(max(val_acc))
+    return None
+def load_precomputed_calibration_result(
+    model_tag: str,
+    sample_tag: str = "full",
+) -> Optional[CalibrationResult]:
+    """
+    Load notebook-generated calibration metrics and figure from image/artifacts/.
+    The function searches recursively so nested folders like artifacts/cnn and
+    artifacts/vit are both supported.
+    """
+    if not ARTIFACTS_DIR.exists():
+        return None
+    metrics_name = f"{model_tag}_calibration_metrics_{sample_tag}.json"
+    image_name = f"{model_tag}_calibration_{sample_tag}.png"
+    metrics_path = next(ARTIFACTS_DIR.rglob(metrics_name), None)
+    image_path = next(ARTIFACTS_DIR.rglob(image_name), None)
+    if metrics_path is None or image_path is None:
+        return None
+    metrics = json.loads(metrics_path.read_text(encoding="utf-8"))
+    return CalibrationResult(
+        ece=float(metrics["ece"]),
+        bin_accuracies=[float(x) for x in metrics["bin_accuracies"]],
+        bin_confidences=[float(x) for x in metrics["bin_confidences"]],
+        bin_counts=[int(x) for x in metrics["bin_counts"]],
+        reliability_diagram=str(image_path),
+        source=f"Notebook artifact ({metrics_path.parent.name})",
+    )

assignments/assignment-1/app/shared/model_registry.py ADDED Viewed

	@@ -0,0 +1,134 @@

+"""
+Model Registry - Central place to register and manage all models.
+This module makes it easy to add new models for different datasets.
+Each model handler should implement the BaseModelHandler interface.
+"""
+from abc import ABC, abstractmethod
+from typing import Dict, List, Optional, Tuple, Any
+import numpy as np
+from PIL import Image
+class PredictionResult:
+    """Container for prediction results from a model."""
+    def __init__(
+        self,
+        label: str,
+        confidence: float,
+        all_labels: List[str],
+        all_confidences: List[float],
+        explanation_image: Optional[np.ndarray] = None,
+    ):
+        self.label = label
+        self.confidence = confidence
+        self.all_labels = all_labels
+        self.all_confidences = all_confidences
+        self.explanation_image = explanation_image  # Grad-CAM or attention map
+class CalibrationResult:
+    """Container for model calibration analysis results."""
+    def __init__(
+        self,
+        ece: float,
+        bin_accuracies: List[float],
+        bin_confidences: List[float],
+        bin_counts: List[int],
+        reliability_diagram: Optional[Any] = None,
+        source: Optional[str] = None,
+    ):
+        self.ece = ece
+        self.bin_accuracies = bin_accuracies
+        self.bin_confidences = bin_confidences
+        self.bin_counts = bin_counts
+        self.reliability_diagram = reliability_diagram
+        self.source = source
+class BaseModelHandler(ABC):
+    """
+    Abstract base class for model handlers.
+    To add a new model, create a subclass and implement all abstract methods.
+    Then register it in the MODEL_REGISTRY dictionary below.
+    """
+    @abstractmethod
+    def get_model_name(self) -> str:
+        """Return human-readable model name."""
+        pass
+    @abstractmethod
+    def get_dataset_name(self) -> str:
+        """Return the dataset name this model was trained on."""
+        pass
+    @abstractmethod
+    def get_data_type(self) -> str:
+        """Return data type: 'image', 'text', or 'multimodal'."""
+        pass
+    @abstractmethod
+    def get_class_labels(self) -> List[str]:
+        """Return list of class labels."""
+        pass
+    @abstractmethod
+    def get_model_info(self) -> Dict[str, str]:
+        """Return dict of model info for display (architecture, params, etc.)."""
+        pass
+    @abstractmethod
+    def predict(self, input_data) -> PredictionResult:
+        """
+        Run prediction on input data.
+        For image models: input_data is a PIL Image or numpy array
+        For text models: input_data is a string
+        For multimodal: input_data is a tuple (image, text)
+        Returns: PredictionResult
+        """
+        pass
+    @abstractmethod
+    def get_example_inputs(self) -> List[Any]:
+        """Return list of example inputs for the demo."""
+        pass
+    def get_calibration_data(
+        self, max_samples: Optional[int] = None
+    ) -> Optional[CalibrationResult]:
+        """
+        Optionally return calibration analysis result.
+        Override this in subclass if you want calibration display.
+        """
+        return None
+# Global model registry - add new models here
+MODEL_REGISTRY: Dict[str, BaseModelHandler] = {}
+def register_model(key: str, handler: BaseModelHandler):
+    """Register a model handler in the global registry."""
+    MODEL_REGISTRY[key] = handler
+def get_model_handler(key: str) -> Optional[BaseModelHandler]:
+    """Get a model handler by key."""
+    return MODEL_REGISTRY.get(key)
+def get_all_model_keys() -> List[str]:
+    """Get all registered model keys."""
+    return list(MODEL_REGISTRY.keys())
+def get_models_by_type(data_type: str) -> Dict[str, BaseModelHandler]:
+    """Get all models of a specific data type."""
+    return {k: v for k, v in MODEL_REGISTRY.items() if v.get_data_type() == data_type}

assignments/assignment-1/app/text/README.md ADDED Viewed

	@@ -0,0 +1,12 @@

+# Text App Modules
+Place text-specific inference handlers here.
+Suggested additions:
+- model wrapper classes
+- preprocessing helpers
+- prediction utilities
+- calibration or explanation helpers if needed
+After adding a handler, register it in `assignments/assignment-1/app/main.py`.

assignments/assignment-1/app/text/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Text model handlers for Assignment 1."""

assignments/assignment-1/image/artifacts/cnn/resnet18_calibration_full.png ADDED Viewed

Git LFS Details

SHA256: 1cb171cadaa51bc9800aeae468a823cb6e30799714c5d6c7cc39d6cbc32acc42
Pointer size: 131 Bytes
Size of remote file: 104 kB

assignments/assignment-1/image/artifacts/cnn/resnet18_calibration_metrics_full.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "model_tag": "resnet18",
+  "sample_tag": "full",
+  "ece": 0.020006245681643487,
+  "num_bins": 10,
+  "total_evaluated_samples": 10000,
+  "bin_accuracies": [
+    0.0,
+    0.0,
+    1.0,
+    0.0,
+    0.6153846383094788,
+    0.5058823823928833,
+    0.5416666865348816,
+    0.6666666865348816,
+    0.6967741847038269,
+    0.9816811680793762
+  ],
+  "bin_confidences": [
+    0.0,
+    0.0,
+    0.2935279607772827,
+    0.36219507455825806,
+    0.46814653277397156,
+    0.5451725721359253,
+    0.6489876508712769,
+    0.752896785736084,
+    0.8511331677436829,
+    0.9974784851074219
+  ],
+  "bin_counts": [
+    0,
+    0,
+    1,
+    4,
+    13,
+    85,
+    72,
+    117,
+    155,
+    9553
+  ]
+}

assignments/assignment-1/image/artifacts/vit/vit_b16_calibration_full.png ADDED Viewed

Git LFS Details

SHA256: 9ec24a90630f7df2784bd3706eac25a4711021ed65c2dc58ec7a1ebccf6bd314
Pointer size: 131 Bytes
Size of remote file: 109 kB

assignments/assignment-1/image/artifacts/vit/vit_b16_calibration_metrics_full.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "model_tag": "vit_b16",
+  "sample_tag": "full",
+  "ece": 0.006916732695698738,
+  "num_bins": 10,
+  "total_evaluated_samples": 10000,
+  "bin_accuracies": [
+    0.0,
+    0.0,
+    0.0,
+    0.0,
+    0.5,
+    0.5714285969734192,
+    0.6034482717514038,
+    0.6901408433914185,
+    0.7037037014961243,
+    0.9934116005897522
+  ],
+  "bin_confidences": [
+    0.0,
+    0.0,
+    0.2842116951942444,
+    0.37363073229789734,
+    0.46517834067344666,
+    0.5469092130661011,
+    0.6531615853309631,
+    0.7513611912727356,
+    0.8554656505584717,
+    0.9979013204574585
+  ],
+  "bin_counts": [
+    0,
+    0,
+    1,
+    1,
+    12,
+    35,
+    58,
+    71,
+    108,
+    9714
+  ]
+}

assignments/assignment-1/image/data/cifar-10-python.tar.gz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d958be074577803d12ecdefd02955f39262c83c16fe9348329d7fe0b5c001ce
+size 170498071

assignments/assignment-1/image/models/resnet18_cifar10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a0076300593993e9e6e09a358c254f24b8ffda12f66ce566e50a289ee462cb10
+size 44808651

assignments/assignment-1/image/models/vit_b16_cifar10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2e4d76e9dcb5b3eb907a00782e9f8af05b9ee46e9f2d3e0e16484d351e63f382
+size 343288191

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+torch>=2.0.0
+torchvision>=0.15.0
+gradio>=5.0.0
+numpy>=1.24.0
+Pillow>=9.0.0
+matplotlib>=3.7.0