Spaces:

rahulkumar11062003
/

Smartvision-Ai

Sleeping

App Files Files Community

rahulkumar11062003 commited on Dec 14, 2025

Commit

a50c70c

verified ·

1 Parent(s): f33c2ff

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +679 -36

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,683 @@
-import altair as alt
 import numpy as np
 import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

+import streamlit as st
+import pandas as pd
+import torch
+import torchvision.transforms as transforms
+from PIL import Image
+import numpy as np
+import torchvision.models as models
+import torchvision
+st.set_page_config(page_title="SmartVision AI - Intelligent Multi-Class Object Recognition System", layout="wide")
+st.sidebar.title("📘 SmartVision AI")
+st.sidebar.markdown("---")
+st.title("🤖 SmartVision AI - Intelligent Multi-Class Object Recognition System")
+st.markdown("---")
+page = st.sidebar.radio("Go to", ["🏠 Home", "🖼️ Image Classification", "📦 Object Detection", "📊 Model Performance", "📸 Live Webcam Detection","ℹ️ About"])
+#------------------------------------------------Home Page----------------------------------------------------------------------------------------
+if page == "🏠 Home":
+    st.subheader("📌 Project Overview")
+    st.markdown("""
+        **SmartVision AI** is an intelligent computer vision system that performs real-time object detection
+        using a custom-trained **YOLO model**.
+        The system allows users to upload images and automatically identifies objects by drawing bounding boxes,
+        class labels, and confidence scores.
+        The goal of this project is to demonstrate an **end-to-end AI pipeline** — from model training
+        to optimized inference and visualization.
+        """)
+    st.info("✨ This project is designed to showcase practical skills in Deep Learning, Computer Vision, and Model Deployment, with a focus on performance optimization and clean output presentation.")
+    st.markdown("---")
+    st.subheader("🚀 Key Features")
+    st.markdown("""
+            ➤ 🔍 **Accurate Object Detection** using a trained YOLO model
+            ➤ 📦 **Bounding Boxes & Labels** on detected objects
+            ➤ 📊 **Confidence Scores** for every prediction
+            ➤ 🧠 **Optional CNN-based verification**
+            ➤ ⚡ **Optimized CNNs** (VGG16, ResNet50, MobileNetV2, EfficientNet-B0)
+            """)
+    st.markdown("---")
+    st.subheader("📝 Instructions for Users")
+    st.text("""
+                    ➤  🔍 Navigate to the Detection page
+                    ➤  📦 Upload an image (JPG / PNG format)
+                    ➤  📊 Wait for the model to process the image
+                    ➤  🧠 View the output image with bounding boxes and labels
+                    ➤  ⚡ Check confidence scores for each detected object
+                """)
+    st.info("⚠️ For best results, use clear images with good lighting and visible objects.")
+    st.markdown("---")
+    st.subheader("🖼️ Sample Demo Images")
+    col1, col2 = st.columns(2)
+    with col1:
+        st.image("img.png", caption="YOLO Detection Example 1")
+    with col2:
+        st.image("img1.png", caption="YOLO Detection Example 2")
+#----------------------------------------------------------------------------------------------------------------------------------
+# we have already trained these models in the collab and using the state.dict(),after saving .here i am using the path of models
+Classes= ['airplane', 'banana', 'bear', 'bicycle', 'bird', 'bowl', 'bus', 'cake', 'car', 'cat', 'dog', 'elephant', 'horse', 'laptop', 'motorcycle', 'mouse', 'parking meter', 'person', 'potted plant', 'sheep', 'toilet', 'traffic light', 'truck', 'tv', 'wine glass']
+NUM_CLASSES = len(Classes)  # 25
+import torch
+import torch.nn as nn
+import torchvision.models as models
+# vgg16
+@st.cache_resource
+def load_custom_vgg16():
+    model = models.vgg16(pretrained=False)
+    model.classifier = nn.Sequential(
+        nn.Linear(25088, 1024),
+        nn.ReLU(inplace=True),
+        nn.Dropout(0.5),
+        nn.Linear(1024, 512),
+        nn.ReLU(inplace=True),
+        nn.Dropout(0.5),
+        nn.Linear(512, 25)   # number of classes
+    )
+    model.load_state_dict(
+        torch.load(
+            "models/vgg16_smartvision.pth",
+            map_location=torch.device("cpu")
+        )
+    )
+    model.eval()
+    return model
+# RestNet50
+@st.cache_resource
+def load_custom_restnet50():
+    model=models.resnet50(pretrained=False)
+    #  CUSTOM CLASSIFICATION HEAD
+    model.fc = nn.Sequential(
+        nn.Linear(model.fc.in_features, 512),
+        nn.BatchNorm1d(512),
+        nn.ReLU(),
+        nn.Dropout(0.5),
+        nn.Linear(512, NUM_CLASSES)
+    )
+    model.load_state_dict(
+        torch.load(
+            "models/smartvision_resnet50.pth",
+            map_location=torch.device("cpu")
+        )
+    )
+    model.eval()
+    return model
+# Mobilenet_v2
+@st.cache_resource
+def load_custom_mobilenetv2():
+    model=models.mobilenet_v2(pretrained=False)
+    #  CUSTOM CLASSIFICATION HEAD
+    model.classifier = nn.Sequential(
+    nn.Linear(1280, 512),
+    nn.ReLU(),
+    nn.Dropout(0.4),
+    nn.Linear(512, NUM_CLASSES)
+    )
+    model.load_state_dict(
+        torch.load(
+            "models/mobilenetv2_smartvision.pth",
+            map_location=torch.device("cpu")
+        )
+    )
+    model.eval()
+    return model
+# EffcientNetB0
+@st.cache_resource
+def load_custom_EffcientNet():
+    model=models.efficientnet_b0(pretrained=False)
+    #  CUSTOM CLASSIFICATION HEAD
+    model.classifier = nn.Sequential(
+    nn.Dropout(0.4),
+    nn.Linear(1280, 512),
+    nn.ReLU(),
+    nn.Dropout(0.3),
+    nn.Linear(512, NUM_CLASSES)
+    )
+    model.load_state_dict(
+        torch.load(
+            "models/EfficientNetB0_smartvision.pth",
+            map_location=torch.device("cpu")
+        )
+    )
+    model.eval()
+    return model
+# Image preprocessing
+preprocess = transforms.Compose([
+    transforms.Resize((224, 224)),
+    transforms.ToTensor(),
+    transforms.Normalize(
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]
+    )
+])
+# ------------------------------------------------------------------------------------------------------------------------------------
+if page == "🖼️ Image Classification":
+    st.subheader("🖼️ Image Classification (Custom Trained CNN Models)")
+    st.markdown("""
+    This page performs **single-object image classification** using multiple
+    **custom-trained CNN models**.
+    Predictions from each model are shown **side-by-side** for comparison.
+    """)
+    uploaded_file = st.file_uploader(
+        "📤 Upload an Image",
+        type=["jpg", "jpeg", "png"]
+    )
+    if uploaded_file:
+        image = Image.open(uploaded_file).convert("RGB")
+        st.markdown("### 📷 Uploaded Image")
+        st.image(image, width=300)
+        input_tensor = preprocess(image).unsqueeze(0)
+        # Load all models
+        models_dict = {
+            "🧠 VGG16": load_custom_vgg16(),
+            "🧠 ResNet50": load_custom_restnet50(),
+            "🧠 MobileNetV2": load_custom_mobilenetv2(),
+            "🧠 EfficientNet-B0": load_custom_EffcientNet()
+        }
+        st.markdown("---")
+        st.markdown("### 🔍 Model Predictions (Top-5)")
+        cols = st.columns(4)
+        for col, (model_name, model) in zip(cols, models_dict.items()):
+            with col:
+                st.markdown(f"#### {model_name}")
+                with torch.no_grad():
+                    outputs = model(input_tensor)
+                    probs = torch.nn.functional.softmax(outputs[0], dim=0)
+                top_probs, top_idxs = torch.topk(
+                    probs, min(5, len(Classes))
+                )
+                for i in range(len(top_idxs)):
+                    class_name = Classes[top_idxs[i].item()]
+                    confidence = top_probs[i].item()
+                    st.write(
+                        f"**{i+1}. {class_name}** — {confidence*100:.2f}%"
+                    )
+                    st.progress(float(confidence))
+    else:
+        st.info("⬆️ Upload an image to classify.")
+import cv2
 import numpy as np
+from ultralytics import YOLO
+from PIL import Image
+import streamlit as st
+@st.cache_resource
+def load_yolo_model():
+    return YOLO("best (1).pt")   # path to my already trained model
+yolo_model = load_yolo_model()
+#-------------------------------------------------------------------------------------------------------------------------------------
+if page == "📦 Object Detection":
+    st.subheader("🎯 Object Detection using YOLO")
+    st.markdown("""
+    Upload an image to detect **multiple objects** using a custom-trained YOLO model.
+    Bounding boxes, class labels, and confidence scores will be displayed.
+    """)
+    st.markdown("---")
+    # Confidence threshold slider
+    conf_threshold = st.slider(
+        "🔧 Confidence Threshold",
+        min_value=0.1,
+        max_value=1.0,
+        value=0.5,
+        step=0.05
+    )
+    uploaded_file = st.file_uploader(
+        "📤 Upload an Image (JPG / PNG)",
+        type=["jpg", "jpeg", "png"]
+    )
+    if uploaded_file:
+        image = Image.open(uploaded_file).convert("RGB")
+        img_array = np.array(image)
+        st.markdown("### 📷 Uploaded Image")
+        st.image(image, width=350)
+        st.markdown("---")
+        st.markdown("### 🔍 Detection Results")
+        # YOLO inference
+        results = yolo_model.predict(
+            source=img_array,
+            conf=conf_threshold,
+            save=False
+        )
+        annotated_img = img_array.copy()
+        detections_found = False
+        for r in results:
+            boxes = r.boxes
+            if boxes is not None:
+                for box in boxes:
+                    detections_found = True
+                    x1, y1, x2, y2 = map(int, box.xyxy[0])
+                    conf = float(box.conf[0])
+                    cls_id = int(box.cls[0])
+                    label = yolo_model.names[cls_id]
+                    # Draw bounding box
+                    cv2.rectangle(
+                        annotated_img,
+                        (x1, y1),
+                        (x2, y2),
+                        (0, 255, 0),
+                        2
+                    )
+                    # Label text
+                    text = f"{label} {conf*100:.2f}%"
+                    cv2.putText(
+                        annotated_img,
+                        text,
+                        (x1, y1 - 10),
+                        cv2.FONT_HERSHEY_SIMPLEX,
+                        0.6,
+                        (0, 255, 0),
+                        2
+                    )
+        if detections_found:
+            st.image(
+                annotated_img,
+                caption="YOLO Detection Output",
+                use_column_width=True
+            )
+        else:
+            st.warning("⚠️ No objects detected. Try lowering the confidence threshold.")
+    else:
+        st.info("⬆️ Upload an image to start object detection.")
+# This dashboard compares multiple CNN architectures based on accuracy and inference speed.
+# While deeper models like VGG16 perform well during training, lightweight models such as
+# MobileNetV2 and EfficientNetB0 offer faster inference, making them suitable for real-time applications.
+#----------------------------------Model Performance---------------------------------------------------------------
+import matplotlib.pyplot as plt
 import pandas as pd
+import seaborn as sns
+import streamlit as st
+# ---------------- MODEL METRICS DATA ----------------
+data = {
+    "Model": ["VGG16", "ResNet50", "MobileNetV2", "EfficientNetB0"],
+    "Train Accuracy": [0.877, 0.6815, 0.51, 0.5297],
+    "Val Accuracy": [0.6345, 0.6855, 0.54, 0.56],
+    "Test Accuracy": [0.633, 0.593, 0.579, 0.543],
+    "Speed": [8.9, 0.5, 13.0, 12.6]  # higher = faster
+}
+df = pd.DataFrame(data)
+# ---------------- PAGE 4: MODEL PERFORMANCE ----------------
+if page == "📊 Model Performance":
+    st.subheader("📊 Model Performance Dashboard")
+    st.markdown("""
+    This section presents a **comparative analysis** of different CNN models used in SmartVision AI.
+    It highlights **training, validation, and test accuracy**, along with **relative inference speed**.
+    """)
+    st.markdown("---")
+    # ---------------- MODEL METRICS TABLE ----------------
+    st.markdown("### 📋 Model Comparison Table")
+    st.dataframe(df, use_container_width=True)
+    st.markdown("---")
+    # ---------------- ACCURACY COMPARISON ----------------
+    st.markdown("### 📈 Accuracy Comparison (Train / Validation / Test)")
+    acc_df = df.melt(
+        id_vars="Model",
+        value_vars=["Train Accuracy", "Val Accuracy", "Test Accuracy"],
+        var_name="Dataset",
+        value_name="Accuracy"
+    )
+    fig1, ax1 = plt.subplots()
+    sns.barplot(
+        data=acc_df,
+        x="Model",
+        y="Accuracy",
+        hue="Dataset",
+        ax=ax1
+    )
+    ax1.set_ylim(0, 1)
+    ax1.set_title("Accuracy Comparison Across Models")
+    ax1.set_ylabel("Accuracy")
+    ax1.set_xlabel("Model")
+    st.pyplot(fig1)
+    st.markdown("---")
+    # ---------------- INFERENCE SPEED COMPARISON ----------------
+    st.markdown("### ⚡ Inference Speed Comparison")
+    fig2, ax2 = plt.subplots()
+    sns.barplot(
+        data=df,
+        x="Model",
+        y="Speed",
+        ax=ax2
+    )
+    ax2.set_title("Relative Inference Speed (Higher is Faster)")
+    ax2.set_ylabel("Speed Score")
+    ax2.set_xlabel("Model")
+    st.pyplot(fig2)
+    st.markdown("---")
+    # ---------------- PERFORMANCE INSIGHTS ----------------
+    st.markdown("### 🧠 Key Observations")
+    st.markdown("""
+    - **VGG16** shows strong training accuracy but noticeable generalization gap
+    - **ResNet50** provides better validation stability
+    - **MobileNetV2** and **EfficientNetB0** trade accuracy for faster inference
+    - Lightweight models are suitable for **real-time or edge deployment**
+    """)
+#----------------------------------------------------Live Camera Detection----------------------------------------------------------------------------------
+import cv2
+import time
+import numpy as np
 import streamlit as st
+from ultralytics import YOLO
+#Loading the pretrained model from YOLO
+@st.cache_resource
+def load_pretrained_yolo():
+    return YOLO("yolov8n.pt")   # pretrained model
+yolo_model_live = load_pretrained_yolo()
+if page == "📸 Live Webcam Detection":
+    st.subheader("📸 Live Camera Detection (Lightweight Mode)")
+    # 0.5 → show only detections above 50% if 0.1 then show only detections above 10%
+    conf_thres = st.slider("Confidence Threshold", 0.1, 1.0, 0.5, 0.05)
+    run = st.checkbox("▶ Start Camera")
+    FRAME_WINDOW = st.image([])
+    fps_text = st.empty()
+    if run:
+        cap = cv2.VideoCapture(0)
+        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
+        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
+        # using this for faster optimization
+        frame_skip = 3   # 🔥 process 1 frame out of 3
+        frame_count = 0
+        prev_time = time.time()
+        while run:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            frame_count += 1
+            # Skip frames
+            if frame_count % frame_skip != 0:
+                continue
+            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            results = yolo_model_live.predict(
+                frame_rgb,
+                conf=conf_thres,
+                imgsz=416,       # 🔥 smaller image
+                verbose=False
+            )
+            annotated_frame = results[0].plot()
+            # FPS
+            curr_time = time.time()
+            fps = 1 / (curr_time - prev_time)
+            prev_time = curr_time
+            fps_text.markdown(f"⚡ FPS: {fps:.1f}")
+            FRAME_WINDOW.image(
+                annotated_frame,
+                channels="RGB",
+                use_column_width=True
+            )
+            time.sleep(0.03)   # 🔥 CPU cooldown
+        cap.release()
+if page == "ℹ️ About":
+    st.subheader("📘 About SmartVision AI")
+    st.markdown("---")
+    # ---------------- PROJECT OVERVIEW ----------------
+    st.markdown("## 🧠 Project Overview")
+    st.markdown("""
+    **SmartVision AI** is an end-to-end **computer vision system** designed to perform
+    **image classification**, **object detection**, and **real-time inference** using
+    state-of-the-art deep learning models.
+    The project demonstrates the complete AI lifecycle:
+    **dataset preparation → model training → optimized inference → deployment using Streamlit**.
+    """)
+    # ---------------- DATASET INFO ----------------
+    st.markdown("## 📂 Dataset Information")
+    st.markdown("""
+    - **Image Classification Dataset**
+        - Domain-specific dataset with **25 object classes**
+        - Preprocessed and augmented for robustness
+        - Split into **Train / Validation / Test** sets
+    - **Object Detection Dataset**
+        - General object detection using **COCO dataset**
+        - 80 commonly occurring object classes
+        - Bounding-box annotated images
+    """)
+    # ---------------- MODEL ARCHITECTURES ----------------
+    st.markdown("## 🏗️ Model Architectures Used")
+    st.markdown("""
+    ### 🔹 Image Classification Models
+    - **VGG16 (Custom Trained)**
+        - Modified fully connected layers
+        - High accuracy on domain-specific data
+    - **ResNet50**
+        - Residual connections for deeper learning
+        - Strong generalization capability
+    - **MobileNetV2**
+        - Lightweight architecture
+        - Optimized for speed and mobile devices
+    - **EfficientNet-B0**
+        - Balanced accuracy and efficiency
+        - Compound scaling technique
+    ### 🔹 Object Detection Model
+    - **YOLOv8 (Pretrained)**
+        - Real-time object detection
+        - Single-stage detector
+        - Optimized for speed and accuracy
+    """)
+    # ---------------- TECH STACK ----------------
+    st.markdown("## 🛠️ Technical Stack")
+    st.markdown("""
+    **Programming Language**
+    - Python 🐍
+    **Deep Learning & Vision**
+    - PyTorch
+    - Torchvision
+    - Ultralytics YOLOv8
+    - OpenCV
+    **Data Processing & Visualization**
+    - NumPy
+    - Pandas
+    - Matplotlib
+    - Seaborn
+    **Web & Deployment**
+    - Streamlit
+    - VS Code
+    - Git & GitHub
+    """)
+    # ---------------- OPTIMIZATION ----------------
+    st.markdown("## ⚡ Performance Optimization Techniques")
+    st.markdown("""
+    - Model quantization (where applicable)
+    - Frame skipping for real-time inference
+    - Resolution scaling for faster detection
+    - CPU-optimized inference pipeline
+    - Streamlit resource caching
+    """)
+    # ---------------- DEVELOPER INFO ----------------
+    st.markdown("## 👨‍💻 Developer Information")
+    st.markdown("""
+    **Developer:** Rahul Kumar
+    **Degree:** B.Tech in Information Technology
+    **Institution:** IIEST Shibpur
+    **Core Interests:**
+    - Computer Vision
+    - Deep Learning
+    - Full Stack Development
+    - AI Model Deployment
+    **Project Goal:**
+    To build scalable, efficient, and production-ready AI systems
+    with real-world deployment considerations.
+    """)
+    # ---------------------------------------FOOTER --------------------------------------------------------------------------------------
+    st.markdown("---")
+    st.info("🚀 SmartVision AI — Bridging Deep Learning Research with Real-World Applications")
+#-------------------Footer Part in sidebar----------------------------------------------------------------------------------------------
+import streamlit as st
+st.sidebar.markdown("---")
+st.sidebar.markdown("### 📌 SmartVision AI")
+col1, col2, col3 = st.sidebar.columns(3)
+with col1:
+    st.sidebar.markdown(
+        "[🌐 GitHub](https://github.com/rahul-tech-kumar/SmartVision-AI---Intelligent-Multi-Class-Object-Recognition-System)",
+        unsafe_allow_html=True
+    )
+with col2:
+    st.sidebar.markdown(
+        "[💼 LinkedIn](https://www.linkedin.com/in/rahul-kumar-173546228/)",
+        unsafe_allow_html=True
+    )
+with col3:
+    st.sidebar.markdown(
+        "[✉️ Email](mailto:rahulkumar11062003@gmail.com)",
+        unsafe_allow_html=True
+    )
+st.sidebar.markdown("---")
+st.sidebar.markdown(
+    """
+    <div style="text-align:center; font-size:12px; color:gray;">
+        🚀 Built with Streamlit & PyTorch<br>
+        © 2025 SmartVision AI
+    </div>
+    """,
+    unsafe_allow_html=True
+)