Amanuel-Ni commited on
Commit
d797315
·
verified ·
1 Parent(s): 6bfa1cb

Upload 12 files

Browse files
Files changed (13) hide show
  1. .gitattributes +3 -0
  2. Convolutional-Neural-Network.jpg +3 -0
  3. README.md +19 -12
  4. Training.py +0 -0
  5. app.py +74 -0
  6. data_viz.py +117 -0
  7. emo.jpg +0 -0
  8. introduction.py +121 -0
  9. models.py +420 -0
  10. new41.jpg +3 -0
  11. prediction.py +166 -0
  12. requirements.txt +8 -0
  13. vit.jpg +3 -0
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Convolutional-Neural-Network.jpg filter=lfs diff=lfs merge=lfs -text
37
+ new41.jpg filter=lfs diff=lfs merge=lfs -text
38
+ vit.jpg filter=lfs diff=lfs merge=lfs -text
Convolutional-Neural-Network.jpg ADDED

Git LFS Details

  • SHA256: 43cd61b6f706b6c9749240ee8ae75ec1b4ec34ab85beefedd0a1e5adca4e811a
  • Pointer size: 131 Bytes
  • Size of remote file: 126 kB
README.md CHANGED
@@ -1,12 +1,19 @@
1
- ---
2
- title: Face
3
- emoji: 🏃
4
- colorFrom: pink
5
- colorTo: purple
6
- sdk: streamlit
7
- sdk_version: 1.44.1
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
1
+ # :earth_americas: GDP dashboard template
2
+
3
+ A simple Streamlit app showing the GDP of different countries in the world.
4
+
5
+ [![Open in Streamlit](https://static.streamlit.io/badges/streamlit_badge_black_white.svg)](https://gdp-dashboard-template.streamlit.app/)
6
+
7
+ ### How to run it on your own machine
8
+
9
+ 1. Install the requirements
10
+
11
+ ```
12
+ $ pip install -r requirements.txt
13
+ ```
14
+
15
+ 2. Run the app
16
+
17
+ ```
18
+ $ streamlit run streamlit_app.py
19
+ ```
Training.py ADDED
File without changes
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+
6
+ from PIL import Image
7
+ import numpy as np
8
+
9
+ from pytorch_grad_cam.utils.image import show_cam_on_image
10
+
11
+ import random
12
+ import matplotlib.pyplot as plt
13
+ import torchvision.transforms as transforms
14
+ import cv2
15
+
16
+
17
+
18
+ #import modules from this repository
19
+ import models
20
+ import prediction
21
+ import data_viz
22
+ import introduction
23
+
24
+
25
+
26
+
27
+
28
+ # -------------------------------
29
+ # Label Dictionary (1-indexed)
30
+ # -------------------------------
31
+ label_dict = {
32
+ 1: 'Surprise',
33
+ 2: 'Disgust',
34
+ 3: 'Happiness',
35
+ 4: 'Sadness',
36
+ 5: 'Anger',
37
+ 6: 'Neutral'
38
+ }
39
+
40
+
41
+
42
+ # -------------------------------
43
+ # Streamlit App UI
44
+ # -------------------------------
45
+
46
+
47
+ st.set_page_config(page_title="Emotion Classification With Computer Vision", layout="centered")
48
+
49
+ st.title("🎭 Facial Expression Recognition")
50
+
51
+ # Model selection
52
+ model_choice = st.selectbox("Choose a model", ["CNN", "VGG16", "ViT"])
53
+
54
+ model = models.load_cnn_model()
55
+
56
+ app_mode = st.sidebar.selectbox('Contents ',['01 Introduction','02 Data visualization','03 Prediction'])
57
+
58
+
59
+
60
+
61
+
62
+
63
+ if app_mode == '01 Introduction':
64
+
65
+ introduction.Show_introduction()
66
+
67
+
68
+ elif app_mode == '02 Data visualization':
69
+ data_viz.data_visualization(model_choice)
70
+
71
+
72
+ else:
73
+ prediction.Display_prediction(model_choice,label_dict)
74
+
data_viz.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import matplotlib.pyplot as plt
3
+
4
+
5
+ from torchinfo import summary
6
+
7
+ def data_visualization(model_choice):
8
+ def plot_model_metrics(model_type):
9
+ # Epochs are the same for all models
10
+ epochs = list(range(1, 11))
11
+
12
+ if model_type == 'CNN':
13
+ loss = [
14
+ 1.2389, 0.9501, 0.8562, 0.7818, 0.7231, 0.6737, 0.6299, 0.5995, 0.5672, 0.5388,
15
+ 0.4650, 0.4469, 0.4263, 0.4116, 0.3860, 0.3775, 0.3621, 0.3455, 0.3250, 0.3098,
16
+ 0.2803, 0.2633, 0.2520, 0.2465, 0.2436, 0.2339, 0.2197, 0.2168, 0.2097, 0.2021
17
+ ]
18
+
19
+ accuracy = [
20
+ 53.36, 65.29, 69.07, 71.31, 73.55, 75.41, 77.23, 78.31, 79.55, 80.71,
21
+ 83.26, 84.22, 85.00, 85.37, 86.30, 86.78, 87.03, 87.60, 88.68, 89.18,
22
+ 90.26, 91.04, 91.20, 91.32, 91.74, 92.02, 92.82, 92.50, 93.00, 93.25
23
+ ]
24
+
25
+ elif model_type == 'VGG16':
26
+ loss = [1.2832, 0.8841, 0.7730, 0.7002, 0.6222, 0.5854, 0.5632, 0.5135, 0.4946, 0.4537]
27
+ accuracy = [56.87, 68.18, 72.45, 75.38, 78.16, 79.32, 80.08, 82.19, 82.61, 84.26]
28
+
29
+ elif model_type == 'ViT':
30
+ loss = [186.7186, 176.4275, 116.8164, 159.8890, 151.8824, 151.6594, 146.9743, 143.7478, 140.8833, 138.7943]
31
+ accuracy = [63.57, 65.16, 66.85, 68.92, 70.29, 71.09, 71.87, 72.54, 73.11, 73.92]
32
+
33
+ else:
34
+ st.error("Model type must be one of: CNN, VGG16, ViT")
35
+ return
36
+
37
+ # Set epochs to match the loss list length
38
+ epochs = list(range(1, len(loss) + 1))
39
+
40
+ # Plot both Loss and Accuracy
41
+ fig, axs = plt.subplots(1, 2, figsize=(12, 4))
42
+
43
+ axs[0].plot(epochs, loss, marker='o', color='tomato')
44
+ axs[0].set_title(f"{model_type} - Loss")
45
+ axs[0].set_xlabel("Epoch")
46
+ axs[0].set_ylabel("Loss")
47
+ axs[0].grid(True)
48
+
49
+ axs[1].plot(epochs, accuracy, marker='o', color='seagreen')
50
+ axs[1].set_title(f"{model_type} - Accuracy")
51
+ axs[1].set_xlabel("Epoch")
52
+ axs[1].set_ylabel("Accuracy (%)")
53
+ axs[1].grid(True)
54
+
55
+ st.pyplot(fig)
56
+
57
+ # Streamlit UI
58
+ st.title("Model Training Metrics Viewer")
59
+ plot_model_metrics(model_choice)
60
+
61
+
62
+
63
+ st.subheader("📊 Model Architecture Summary")
64
+ st.markdown("This section provides a detailed breakdown of the model architecture, including the number of parameters, trainability, and estimated model size.")
65
+
66
+ if model_choice == "CNN":
67
+ st.markdown("""
68
+ #### 🤖 CNN Architecture (`FacialReaction`)
69
+ | Layer | Input Shape | Output Shape | Params | Trainable |
70
+ |-------------------|-------------------|-------------------|------------|-----------|
71
+ | Conv2d (conv1) | [1, 3, 100, 100] | [1, 64, 99, 99] | 3,136 | ✅ |
72
+ | MaxPool2d | [1, 64, 99, 99] | [1, 64, 49, 49] | - | ❌ |
73
+ | Conv2d (conv2) | [1, 64, 49, 49] | [1, 64, 48, 48] | 65,600 | ✅ |
74
+ | MaxPool2d | [1, 64, 48, 48] | [1, 64, 24, 24] | - | ❌ |
75
+ | Linear (fc1) | [1, 36864] | [1, 128] | 4,718,720 | ✅ |
76
+ | Linear (fc2) | [1, 128] | [1, 6] | 774 | ✅ |
77
+
78
+ **Total Parameters**: `4,788,230`
79
+ **Trainable Parameters**: `4,788,230`
80
+ **Non-trainable Parameters**: `0`
81
+ **Estimated Model Size**: `~25.5 MB`
82
+ """)
83
+
84
+ elif model_choice == "ViT":
85
+ st.markdown("""
86
+ #### 🧠 Vision Transformer (ViT) Architecture
87
+
88
+ | Component | Input Shape | Output Shape | Params | Trainable |
89
+ |--------------------------------|--------------------|--------------------|------------|-----------|
90
+ | Patch Embedding (Conv2d) | [32, 3, 224, 224] | [32, 192, 14, 14] | 147,648 | ✅ |
91
+ | Transformer Blocks (12x) | [32, 197, 192] | [32, 197, 192] | ~5.3M | ✅ |
92
+ | Classification Head (fc_out) | [32, 192] | [32, 6] | 1,158 | ✅ |
93
+
94
+ **Total Parameters**: `5,526,348`
95
+ **Trainable Parameters**: `5,526,348`
96
+ **Non-trainable Parameters**: `0`
97
+ **Estimated Model Size**: `~1.3 GB`
98
+ """)
99
+
100
+ elif model_choice == "VGG16":
101
+ st.markdown("""
102
+ #### 📦 VGG16 Model Summary
103
+
104
+ VGG16 is a deep convolutional neural network known for its uniform architecture of `3x3` conv filters and `2x2` max-pooling layers. It ends with 3 fully connected layers.
105
+
106
+ Since it's quite large (138 million parameters), we’re showing a high-level overview here:
107
+
108
+ | Component | Description |
109
+ |---------------------|------------------------------------|
110
+ | Convolutional Blocks| 13 Conv layers + ReLU + MaxPooling |
111
+ | Fully Connected | FC1 → FC2 → Output layer (6 units) |
112
+ | Pretrained Base | Yes (ImageNet, fine-tuned) |
113
+
114
+ **Total Parameters**: ~`138 million`
115
+ **Trainable Parameters**: ~`138 million`
116
+ **Estimated Model Size**: ~`500+ MB`
117
+ """)
emo.jpg ADDED
introduction.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+
4
+
5
+ def Show_introduction():
6
+ # Set page configuration
7
+
8
+
9
+ # Load and display the image
10
+ image = Image.open("emo.jpg")
11
+
12
+ # Center the image using columns
13
+ col1, col2, col3 = st.columns([1, 2, 1])
14
+ with col2:
15
+ st.image(image, use_container_width=True)
16
+
17
+
18
+
19
+ # Title and subtitle
20
+ st.title("🎭 Facial Emotion Recognition")
21
+ st.subheader("Detecting Emotions from Facial Expressions Using Deep Learning")
22
+
23
+ # Introduction text
24
+ st.markdown("""
25
+ Welcome to the **Face Emotion Recognition** app!
26
+ This project demonstrates the use of deep learning to recognize human emotions from facial expressions in real time.
27
+
28
+ Using a convolutional neural network (CNN) trained on facial image datasets, the model can classify emotions such as **Happy**, **Sad**, **Angry**, **Surprised**, and more.
29
+ """)
30
+
31
+ # Add a separator
32
+ st.markdown("---")
33
+
34
+ # Motivation section
35
+ st.header("💡 Motivation")
36
+ st.markdown("""
37
+ Facial expressions are a fundamental mode of non-verbal communication.
38
+ With the rise of AI and human-computer interaction, emotion recognition has gained importance in applications such as:
39
+ - Mental health monitoring
40
+ - Customer feedback analysis
41
+ - Security and surveillance systems
42
+ - Interactive gaming and virtual assistants
43
+ """)
44
+
45
+ # Objective section
46
+ st.header("🎯 Objective")
47
+ st.markdown("""
48
+ The goal of this project is to:
49
+ - Build a robust deep learning model that can accurately classify emotions from facial images.
50
+ - Deploy the model in a user-friendly interface for real-time predictions.
51
+ - Explore how AI can understand human affect through facial features.
52
+ """)
53
+
54
+ # How it works section
55
+ st.header("⚙️ How It Works")
56
+ st.markdown("""
57
+ 1. Upload an image or use your webcam to capture a face.
58
+ 2. The model detects the face and analyzes facial features.
59
+ 3. It then predicts the most likely emotion and displays the result.
60
+
61
+ This app was built with **Streamlit**, and **PyTorch**.
62
+ """)
63
+
64
+ # Model Overview section
65
+ st.header("🧠 Models Used")
66
+
67
+ # Dropdown for model selection
68
+ model_choice = st.selectbox(
69
+ "Select a model to learn more about it:",
70
+ ["Convolutional Neural Network (CNN)", "Vision Transformer (ViT)", "VGG"]
71
+ )
72
+
73
+ if model_choice == "Convolutional Neural Network (CNN)":
74
+ st.subheader("🌀 Convolutional Neural Network (CNN)")
75
+ cnn_image = Image.open("Convolutional-Neural-Network.jpg") # Replace with your actual image file
76
+ st.image(cnn_image, caption="Typical CNN architecture", use_container_width=True)
77
+ st.markdown("""
78
+ CNNs are specialized deep learning models for image processing.
79
+ They consist of layers that automatically learn to detect features like edges, textures, and patterns in images.
80
+
81
+ ### 📍 Where It's Used:
82
+ - **Face recognition systems** (e.g., in mobile phones)
83
+ - **Medical imaging** (e.g., detecting tumors)
84
+ - **Autonomous vehicles** (e.g., recognizing road signs and pedestrians)
85
+
86
+ In our project, CNNs serve as a baseline for detecting emotions from faces due to their efficiency and interpretability. They are especially good when dealing with relatively smaller datasets.
87
+ """)
88
+
89
+ elif model_choice == "Vision Transformer (ViT)":
90
+ st.subheader("🧠 Vision Transformer (ViT)")
91
+ vit_image = Image.open("vit.jpg") # Replace with your actual image file
92
+ st.image(vit_image, caption="Vision Transformer concept", use_container_width=True)
93
+ st.markdown("""
94
+ ViTs bring the power of transformer models to the vision domain by splitting images into patches and processing them using self-attention — a technique originally used in NLP.
95
+
96
+ ### 📍 Where It's Used:
97
+ - **Large-scale image classification** (e.g., ImageNet tasks)
98
+ - **Fine-grained object detection**
99
+ - **Art analysis and medical diagnosis**
100
+
101
+ In our app, ViT is used for capturing global relationships in facial features that might not be easily detected by CNNs. It's especially effective with high-resolution images and large training sets.
102
+ """)
103
+
104
+ elif model_choice == "VGG":
105
+ st.subheader("🏗️ VGG Network")
106
+ vgg_image = Image.open("new41.jpg") # Replace with your actual image file
107
+ st.image(vgg_image, caption="VGG architecture overview", use_container_width=True)
108
+ st.markdown("""
109
+ The VGG model, introduced by the Visual Geometry Group at Oxford, is known for its deep yet simple architecture using small (3x3) convolution filters.
110
+
111
+ ### 📍 Where It's Used:
112
+ - **Facial recognition systems**
113
+ - **Emotion detection**
114
+ - **Transfer learning tasks**, where VGG is pre-trained on large datasets like ImageNet and fine-tuned for specific applications.
115
+
116
+ We use VGG as a benchmark in our system. While it's more computationally intensive than CNN, it performs well when high accuracy is prioritized over speed.
117
+ """)
118
+
119
+ # Footer or next step
120
+ st.markdown("---")
121
+ st.info("👉 Use the sidebar to get started and test the model with your own images or webcam.")
models.py ADDED
@@ -0,0 +1,420 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+ from torchvision import models, transforms
6
+ from PIL import Image
7
+
8
+
9
+
10
+
11
+ from pytorch_grad_cam import GradCAM
12
+ from pytorch_grad_cam.utils.image import show_cam_on_image
13
+
14
+ import numpy as np
15
+
16
+
17
+
18
+
19
+
20
+ # -------------------------------
21
+ # Label Dictionary (1-indexed)
22
+ # -------------------------------
23
+ label_dict = {
24
+ 1: 'Surprise',
25
+ 2: 'Disgust',
26
+ 3: 'Happiness',
27
+ 4: 'Sadness',
28
+ 5: 'Anger',
29
+ 6: 'Neutral'
30
+ }
31
+
32
+
33
+
34
+ import torch
35
+ from torch.utils.data import DataLoader
36
+ from torchvision import datasets, transforms
37
+
38
+
39
+
40
+
41
+
42
+ # Parameters
43
+ batch_size = 64
44
+
45
+
46
+
47
+
48
+ img_size = 100 # Updated from 48 to 100
49
+
50
+ # Transforms for CNN
51
+ transform_train = transforms.Compose([
52
+ transforms.Resize((img_size, img_size)), # Resize to 100x100
53
+ transforms.RandomHorizontalFlip(), # Data augmentation
54
+ transforms.RandomRotation(degrees=10), # Data augmentation
55
+ transforms.ToTensor(), # Convert to tensor
56
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # Normalize RGB channels
57
+ ])
58
+
59
+ transform_test = transforms.Compose([
60
+ transforms.Resize((img_size, img_size)),
61
+ transforms.ToTensor(),
62
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
63
+ ])
64
+
65
+
66
+
67
+
68
+ # Transforms for VGG and ViT
69
+ transform = transforms.Compose([
70
+ transforms.Resize((224, 224)), # Resize to 224x224
71
+ transforms.ToTensor(), # Convert to tensor [0,1]
72
+ # transforms.RandomRotation(9),
73
+ transforms.Normalize( # Normalize using ImageNet stats
74
+ mean=[0.485, 0.456, 0.406],
75
+ std=[0.229, 0.224, 0.225]
76
+ )
77
+ ])
78
+
79
+
80
+
81
+
82
+
83
+
84
+ # Then unzip it
85
+ import zipfile
86
+ import os
87
+
88
+ with zipfile.ZipFile('dataset_final.zip', 'r') as zip_ref:
89
+ zip_ref.extractall('Dataset_final')
90
+
91
+
92
+ #datasets
93
+ train_dataset_cnn = datasets.ImageFolder(root='Dataset_final/train', transform=transform_train)
94
+ test_dataset_cnn = datasets.ImageFolder(root='Dataset_final/test', transform=transform_test)
95
+
96
+
97
+ train_dataset_v = datasets.ImageFolder(root='Dataset_final/train', transform=transform)
98
+ test_dataset_v = datasets.ImageFolder(root='Dataset_final/test', transform=transform)
99
+
100
+
101
+
102
+
103
+
104
+ # DataLoaders
105
+ train_loader_cnn = DataLoader(train_dataset_cnn, batch_size=batch_size, shuffle=True, num_workers=2)
106
+ test_loader_cnn = DataLoader(test_dataset_cnn, batch_size=batch_size, shuffle=False, num_workers=2)
107
+
108
+
109
+ train_loader_v = DataLoader(train_dataset_v, batch_size=batch_size, shuffle=True, num_workers=2)
110
+ test_loader_v = DataLoader(test_dataset_v, batch_size=batch_size, shuffle=False, num_workers=2)
111
+
112
+
113
+
114
+
115
+
116
+
117
+
118
+
119
+
120
+
121
+
122
+
123
+
124
+
125
+
126
+
127
+
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
138
+
139
+
140
+
141
+ # -------------------------------
142
+ # Model: CNN (your custom model)
143
+ # -------------------------------
144
+ class FacialReaction(nn.Module):
145
+ def __init__(self, num_classes=7):
146
+ super(FacialReaction, self).__init__()
147
+ self.conv1 = nn.Conv2d(3, 64, kernel_size=4, padding=1)
148
+ self.pool = nn.MaxPool2d(2, 2)
149
+ self.conv2 = nn.Conv2d(64, 64, kernel_size=4, padding=1)
150
+ self.fc1 = nn.Linear(64 * 24 * 24, 128)
151
+ self.fc2 = nn.Linear(128, num_classes)
152
+
153
+ def forward(self, x):
154
+ x = self.pool(F.relu(self.conv1(x)))
155
+ x = self.pool(F.relu(self.conv2(x)))
156
+ x = x.view(x.size(0), -1)
157
+ x = F.relu(self.fc1(x))
158
+ return self.fc2(x)
159
+
160
+
161
+
162
+
163
+
164
+
165
+
166
+
167
+
168
+
169
+
170
+
171
+
172
+ class PatchEmbed(nn.Module):
173
+ def __init__(self, img_size=224, patch_size=16, in_channels=3, embed_dim=192):
174
+ super(PatchEmbed, self).__init__()
175
+ self.img_size = img_size
176
+ self.patch_size = patch_size
177
+ self.embed_dim = embed_dim
178
+
179
+ # Create a convolutional layer for patch embedding
180
+ self.proj = nn.Conv2d(in_channels, embed_dim, kernel_size=patch_size, stride=patch_size)
181
+
182
+ def forward(self, x):
183
+ x = self.proj(x) # (B, embed_dim, H/patch_size, W/patch_size)
184
+ x = x.flatten(2) # Flatten (B, embed_dim, N_patches)
185
+ x = x.transpose(1, 2) # (B, N_patches, embed_dim)
186
+ return x
187
+
188
+ class MultiHeadSelfAttention(nn.Module):
189
+ def __init__(self, embed_dim, num_heads):
190
+ super(MultiHeadSelfAttention, self).__init__()
191
+ self.num_heads = num_heads
192
+ self.embed_dim = embed_dim
193
+ self.head_dim = embed_dim // num_heads
194
+
195
+ assert self.head_dim * num_heads == embed_dim, "Embedding dimension must be divisible by num_heads"
196
+
197
+ self.qkv = nn.Linear(embed_dim, embed_dim * 3)
198
+ self.fc_out = nn.Linear(embed_dim, embed_dim)
199
+
200
+ def forward(self, x):
201
+ B, N, E = x.shape
202
+
203
+ # Linear transformation to get queries, keys and values
204
+ qkv = self.qkv(x)
205
+ qkv = qkv.reshape(B, N, 3, self.num_heads, self.head_dim).permute(2, 0, 3, 1, 4)
206
+
207
+ q, k, v = qkv[0], qkv[1], qkv[2]
208
+
209
+ # Attention calculation
210
+ energy = torch.einsum("nqhd,nkhd->nhqk", [q, k])
211
+ attention = torch.softmax(energy / (self.embed_dim ** 0.5), dim=-1)
212
+
213
+ out = torch.einsum("nhql,nlhd->nqhd", [attention, v]).reshape(B, N, E)
214
+ out = self.fc_out(out)
215
+ return out
216
+
217
+ class FeedForward(nn.Module):
218
+ def __init__(self, embed_dim, hidden_dim=768):
219
+ super(FeedForward, self).__init__()
220
+ self.fc1 = nn.Linear(embed_dim, hidden_dim)
221
+ self.fc2 = nn.Linear(hidden_dim, embed_dim)
222
+ self.dropout = nn.Dropout(0.1)
223
+
224
+ def forward(self, x):
225
+ x = self.fc1(x)
226
+ x = F.gelu(x)
227
+ x = self.dropout(x)
228
+ x = self.fc2(x)
229
+ return x
230
+
231
+ class TransformerBlock(nn.Module):
232
+ def __init__(self, embed_dim, num_heads, hidden_dim=768):
233
+ super(TransformerBlock, self).__init__()
234
+ self.attn = MultiHeadSelfAttention(embed_dim, num_heads)
235
+ self.ffn = FeedForward(embed_dim, hidden_dim)
236
+ self.layernorm1 = nn.LayerNorm(embed_dim)
237
+ self.layernorm2 = nn.LayerNorm(embed_dim)
238
+
239
+ def forward(self, x):
240
+ attn_out = self.attn(x)
241
+ x = self.layernorm1(x + attn_out) # Add & Norm
242
+ ffn_out = self.ffn(x)
243
+ x = self.layernorm2(x + ffn_out) # Add & Norm
244
+ return x
245
+
246
+ class VisionTransformer(nn.Module):
247
+ def __init__(self, img_size=224, patch_size=16, in_channels=3, embed_dim=192, num_heads=3, num_layers=12, num_classes=6):
248
+ super(VisionTransformer, self).__init__()
249
+ self.embed_dim = embed_dim
250
+
251
+ # Patch Embedding
252
+ self.patch_embed = PatchEmbed(img_size, patch_size, in_channels, embed_dim)
253
+
254
+ # Positional Encoding
255
+ self.pos_embed = nn.Parameter(torch.randn(1, (img_size // patch_size) ** 2 + 1, embed_dim))
256
+ self.cls_token = nn.Parameter(torch.randn(1, 1, embed_dim))
257
+
258
+ # Transformer Blocks
259
+ self.blocks = nn.ModuleList([
260
+ TransformerBlock(embed_dim, num_heads) for _ in range(num_layers)
261
+ ])
262
+
263
+ # MLP Head for classification
264
+ self.fc_out = nn.Linear(embed_dim, num_classes)
265
+
266
+ def forward(self, x):
267
+ # Embed the image into patches
268
+ x = self.patch_embed(x)
269
+
270
+ # Add class token to the sequence
271
+ batch_size = x.size(0)
272
+ cls_tokens = self.cls_token.expand(batch_size, -1, -1)
273
+ x = torch.cat((cls_tokens, x), dim=1) # (B, N_patches+1, embed_dim)
274
+
275
+ # Add positional encoding
276
+ x = x + self.pos_embed
277
+
278
+ # Pass through transformer blocks
279
+ for block in self.blocks:
280
+ x = block(x)
281
+
282
+ # Classification head
283
+ cls_output = x[:, 0] # Extract the class token output
284
+ out = self.fc_out(cls_output)
285
+
286
+ return out
287
+
288
+
289
+
290
+
291
+
292
+
293
+
294
+
295
+ # -------------------------------
296
+ # Load Model Functions (correct filenames)
297
+ # -------------------------------
298
+ @st.cache_resource
299
+ def load_cnn_model():
300
+ model = FacialReaction(num_classes=6)
301
+
302
+ # Load full checkpoint
303
+ checkpoint = torch.load('CNN_facial_reaction.pth',map_location='cpu')
304
+
305
+ # Load only the model weights
306
+ model.load_state_dict(checkpoint['model_state_dict'])
307
+
308
+ model.eval()
309
+ return model
310
+
311
+ @st.cache_resource
312
+ def load_vgg_model():
313
+ model = models.vgg16(pretrained=False)
314
+ model.classifier[6] = nn.Linear(4096, 6)
315
+ model.load_state_dict(torch.load("vgg_dataset2_84_74.pth", map_location='cpu'))
316
+ model.eval()
317
+ return model
318
+
319
+ @st.cache_resource
320
+ def load_vit_model():
321
+ model = VisionTransformer()
322
+
323
+ model.heads = nn.Sequential(nn.Linear(192,6))
324
+
325
+
326
+ checkpoint = torch.load('vit_70_67.pth', map_location='cpu')
327
+ model.load_state_dict(checkpoint['model_state_dict'])
328
+
329
+
330
+
331
+ model.eval()
332
+ return model
333
+
334
+ # -------------------------------
335
+ # Preprocess Webcam Image
336
+ # -------------------------------
337
+ def preprocess_image(img: Image.Image,model_type='CNN'):
338
+
339
+ if model_type == 'CNN':
340
+ transform = transforms.Compose([
341
+ transforms.Resize((100, 100)),
342
+ transforms.ToTensor(),
343
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
344
+ ])
345
+ return transform(img).unsqueeze(0) # [1, 3, 100, 100]
346
+
347
+ else:
348
+ transform = transforms.Compose([
349
+ transforms.Resize((224, 224)), # Resize to 224x224
350
+ transforms.ToTensor(), # Convert to tensor [0,1]
351
+ # transforms.RandomRotation(9),
352
+ transforms.Normalize( # Normalize using ImageNet stats
353
+ mean=[0.485, 0.456, 0.406],
354
+ std=[0.229, 0.224, 0.225]
355
+ )
356
+ ])
357
+ return transform(img).unsqueeze(0) # [1, 3, 224, 224]
358
+
359
+
360
+
361
+
362
+
363
+
364
+
365
+
366
+
367
+
368
+
369
+
370
+
371
+
372
+
373
+
374
+
375
+
376
+ def apply_gradcam_streamlit(model, input_tensor, target_layer, class_names=None, true_label=None):
377
+ """
378
+ Applies Grad-CAM on a given image tensor and returns:
379
+ - Original image
380
+ - Original + Grad-CAM overlay
381
+
382
+ Args:
383
+ - model: Trained CNN/VGG/ViT model.
384
+ - input_tensor: A single image tensor (1, 3, H, W).
385
+ - target_layer: Target layer for Grad-CAM.
386
+ - class_names: Optional dict mapping class indices to names.
387
+ - true_label: Optional integer ground-truth label (1-indexed).
388
+
389
+ Returns:
390
+ - Tuple of original image and Grad-CAM overlay (both as NumPy arrays)
391
+ """
392
+
393
+ model.eval()
394
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
395
+ model = model.to(device)
396
+ input_tensor = input_tensor.to(device)
397
+
398
+ # GradCAM setup
399
+ cam = GradCAM(model=model, target_layers=[target_layer])
400
+ targets = None
401
+
402
+ # Run inference
403
+ outputs = model(input_tensor)
404
+ _, predicted = outputs.max(1)
405
+ predicted_label = predicted.item() + 1 # shift from 0–5 to 1–6
406
+
407
+ # Grad-CAM computation
408
+ grayscale_cam = cam(input_tensor=input_tensor)[0, :] # (H, W)
409
+
410
+ # Unnormalize and prepare original image
411
+ img_disp = input_tensor.squeeze(0).cpu()
412
+ img_disp = img_disp * 0.5 + 0.5 # Assuming normalization was [-1, 1]
413
+ img_disp = img_disp.permute(1, 2, 0).numpy() # (H, W, C)
414
+
415
+ # Create heatmap image
416
+ heatmap_image = show_cam_on_image(img_disp, grayscale_cam, use_rgb=True)
417
+
418
+ # Return both images for display
419
+ return img_disp, heatmap_image, predicted_label
420
+
new41.jpg ADDED

Git LFS Details

  • SHA256: 4886bad3ea1ab441f5bccb1855e5ca4bdb1d70e6fd4aa040a67824c70ebf540e
  • Pointer size: 131 Bytes
  • Size of remote file: 246 kB
prediction.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+
6
+ from PIL import Image
7
+ import numpy as np
8
+
9
+ from pytorch_grad_cam.utils.image import show_cam_on_image
10
+
11
+ import random
12
+ import matplotlib.pyplot as plt
13
+ import torchvision.transforms as transforms
14
+ import cv2
15
+
16
+
17
+
18
+ import models
19
+
20
+
21
+
22
+ def Display_prediction(model_choice, label_dict,):
23
+ # Camera input
24
+ img_file = st.camera_input("📸 Take a photo to classify")
25
+ # Load the selected model
26
+ if model_choice == "CNN":
27
+ model = models.load_cnn_model()
28
+ elif model_choice == "VGG16":
29
+ model = models.load_vgg_model()
30
+ else:
31
+ model = models.load_vit_model()
32
+
33
+
34
+
35
+
36
+ if img_file is not None:
37
+ image = Image.open(img_file)
38
+ st.image(image, caption="Captured Image", use_container_width=True)
39
+
40
+
41
+ input_tensor = models.preprocess_image(image, model_type=model_choice)
42
+
43
+
44
+ # Inference
45
+ with torch.no_grad():
46
+ outputs = model(input_tensor)
47
+ _, predicted = torch.max(outputs, 1)
48
+ predicted_label = predicted.item() + 1
49
+
50
+ st.success(f"🧠 Predicted Emotion: **{label_dict[predicted_label]}**")
51
+
52
+
53
+ if model_choice == "CNN":
54
+ target_layer = model.conv2 # Adjust to your CNN
55
+
56
+ # Grad-CAM
57
+ orig, gradcam_img, pred_label = models.apply_gradcam_streamlit(
58
+ model=model,
59
+ input_tensor=input_tensor,
60
+ target_layer=target_layer,
61
+ class_names=label_dict,
62
+ true_label=None
63
+ )
64
+
65
+ st.subheader("🧠 Grad-CAM Visualization")
66
+
67
+ st.image(gradcam_img, caption=f"Grad-CAM Heatmap: {label_dict[pred_label]}", use_column_width=True)
68
+
69
+
70
+
71
+ # Convert both images to PIL
72
+ orig_img_pil = Image.fromarray((orig * 255).astype(np.uint8))
73
+ heatmap_img_pil = Image.fromarray(gradcam_img)
74
+
75
+ # Side-by-side view
76
+ st.image([orig_img_pil, heatmap_img_pil], caption=["Original", "Grad-CAM"], width=300)
77
+
78
+
79
+
80
+
81
+
82
+
83
+ st.title("🧠 Random Test Image Prediction")
84
+
85
+ if st.button("🎲 Show Random Prediction"):
86
+ model.eval()
87
+
88
+ test_dataset = models.test_dataset_cnn
89
+
90
+
91
+
92
+
93
+
94
+ if model_choice != "CNN":
95
+ test_dataset = models.test_dataset_v
96
+
97
+ # Pick a truly random image from the whole dataset
98
+
99
+
100
+ index_to_label = {i: int(cls) for i, cls in enumerate(test_dataset.classes)} # test_dataset.classes should be strings like ['1', '2', ..., '6']
101
+
102
+
103
+ total_samples = len(test_dataset)
104
+ rand_index = random.randint(0, total_samples - 1)
105
+
106
+ # Load image and label directly
107
+ image, label = test_dataset[rand_index]
108
+ input_tensor = image.unsqueeze(0) # Add batch dimension
109
+
110
+ # Run prediction
111
+ model.eval()
112
+ with torch.no_grad():
113
+ output = model(input_tensor)
114
+ _, predicted = torch.max(output, 1)
115
+
116
+ # Convert class index (0-based) to folder label (1-based)
117
+ true_label = int(test_dataset.classes[label])
118
+ predicted_label = int(test_dataset.classes[predicted.item()])
119
+
120
+
121
+
122
+
123
+ # Convert image for display
124
+ image_disp = image.permute(1, 2, 0).cpu().numpy()
125
+ image_disp = image_disp * 0.5 + 0.5 # unnormalize
126
+ image_disp = np.clip(image_disp, 0, 1)
127
+
128
+ # Display image using Matplotlib
129
+ fig, ax = plt.subplots(figsize=(6, 6))
130
+ ax.imshow(image_disp)
131
+ ax.set_title(f"✅ True: {label_dict[true_label]}\n🤖 Predicted: {label_dict[predicted_label]}")
132
+ ax.axis("off")
133
+ st.pyplot(fig)
134
+
135
+
136
+
137
+
138
+
139
+ if model_choice == "CNN":
140
+ # ----------------------------
141
+ # 🧠 Apply Grad-CAM on Selected Random Image
142
+ # ----------------------------
143
+
144
+
145
+ target_layer = model.conv2
146
+
147
+ # Prepare the single image tensor for Grad-CAM
148
+ input_tensor = image.unsqueeze(0)
149
+
150
+ # Grad-CAM
151
+ img_disp, gradcam_overlay, _ = models.apply_gradcam_streamlit(
152
+ model=model,
153
+ input_tensor=input_tensor,
154
+ target_layer=target_layer,
155
+ class_names=label_dict,
156
+ true_label=true_label
157
+ )
158
+
159
+ st.subheader("🔥 Grad-CAM on Random Test Image")
160
+
161
+ # Convert both to displayable format
162
+ orig_pil = Image.fromarray((img_disp * 255).astype(np.uint8))
163
+ heatmap_pil = Image.fromarray(gradcam_overlay)
164
+
165
+ # Side-by-side in Streamlit
166
+ st.image([orig_pil, heatmap_pil], caption=["Original", "Grad-CAM"], width=300)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ torch
3
+ torchvision
4
+ matplotlib
5
+ pytorch-grad-cam
6
+
7
+ numpy
8
+
vit.jpg ADDED

Git LFS Details

  • SHA256: ded00c17f00ead2c17811e3a396a43d76beca9290f30c5aa6c40d52030f7cd3a
  • Pointer size: 131 Bytes
  • Size of remote file: 171 kB