import streamlit as st
import pandas as pd

import torch
import torchvision.transforms as transforms
from PIL import Image
import numpy as np
import torchvision.models as models
import torchvision


st.set_page_config(page_title="SmartVision AI - Intelligent Multi-Class Object Recognition System", layout="wide")
st.sidebar.title("📘 SmartVision AI")

st.sidebar.markdown("---")

st.title("🤖 SmartVision AI - Intelligent Multi-Class Object Recognition System")
st.markdown("---")

page = st.sidebar.radio("Go to", ["🏠 Home", "🖼️ Image Classification", "📦 Object Detection", "📊 Model Performance", "📸 Live Webcam Detection","ℹ️ About"])


#------------------------------------------------Home Page----------------------------------------------------------------------------------------

if page == "🏠 Home":
    st.subheader("📌 Project Overview")
    st.markdown("""
        **SmartVision AI** is an intelligent computer vision system that performs real-time object detection 
        using a custom-trained **YOLO model**.  
        The system allows users to upload images and automatically identifies objects by drawing bounding boxes,
        class labels, and confidence scores.

        The goal of this project is to demonstrate an **end-to-end AI pipeline** — from model training 
        to optimized inference and visualization.
        """)
    
    st.info("✨ This project is designed to showcase practical skills in Deep Learning, Computer Vision, and Model Deployment, with a focus on performance optimization and clean output presentation.")
    st.markdown("---")
    st.subheader("🚀 Key Features")
    st.markdown("""
            ➤ 🔍 **Accurate Object Detection** using a trained YOLO model  
            ➤ 📦 **Bounding Boxes & Labels** on detected objects  
            ➤ 📊 **Confidence Scores** for every prediction  
            ➤ 🧠 **Optional CNN-based verification**  
            ➤ ⚡ **Optimized CNNs** (VGG16, ResNet50, MobileNetV2, EfficientNet-B0)
            """)

    
    st.markdown("---")
    st.subheader("📝 Instructions for Users")
    st.text("""
                    ➤  🔍 Navigate to the Detection page
                    ➤  📦 Upload an image (JPG / PNG format)
                    ➤  📊 Wait for the model to process the image
                    ➤  🧠 View the output image with bounding boxes and labels
                    ➤  ⚡ Check confidence scores for each detected object
                """)
    st.info("⚠️ For best results, use clear images with good lighting and visible objects.")
    
    st.markdown("---")
    st.subheader("🖼️ Sample Demo Images")
    col1, col2 = st.columns(2)
    with col1:
        st.image("img.png", caption="YOLO Detection Example 1")
    with col2:
        st.image("img1.png", caption="YOLO Detection Example 2")

        
#----------------------------------------------------------------------------------------------------------------------------------
# we have already trained these models in the collab and using the state.dict(),after saving .here i am using the path of models

Classes= ['airplane', 'banana', 'bear', 'bicycle', 'bird', 'bowl', 'bus', 'cake', 'car', 'cat', 'dog', 'elephant', 'horse', 'laptop', 'motorcycle', 'mouse', 'parking meter', 'person', 'potted plant', 'sheep', 'toilet', 'traffic light', 'truck', 'tv', 'wine glass']
NUM_CLASSES = len(Classes)  # 25

import torch
import torch.nn as nn
import torchvision.models as models


# vgg16
@st.cache_resource
def load_custom_vgg16():
    model = models.vgg16(pretrained=False)

    model.classifier = nn.Sequential(
        nn.Linear(25088, 1024),
        nn.ReLU(inplace=True),
        nn.Dropout(0.5),

        nn.Linear(1024, 512),
        nn.ReLU(inplace=True),
        nn.Dropout(0.5),

        nn.Linear(512, 25)   # number of classes
    )

    model.load_state_dict(
        torch.load(
            "models/vgg16_smartvision.pth",
            map_location=torch.device("cpu")
        )
    )

    model.eval()
    return model

# RestNet50
@st.cache_resource
def load_custom_restnet50():
    model=models.resnet50(pretrained=False)
    
    #  CUSTOM CLASSIFICATION HEAD
    model.fc = nn.Sequential(
        nn.Linear(model.fc.in_features, 512),
        nn.BatchNorm1d(512),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(512, NUM_CLASSES)
    )
    
    model.load_state_dict(
        torch.load(
            "models/smartvision_resnet50.pth",
            map_location=torch.device("cpu")
        )
    )

    model.eval()
    return model

# Mobilenet_v2
@st.cache_resource
def load_custom_mobilenetv2():
    model=models.mobilenet_v2(pretrained=False)
    
    #  CUSTOM CLASSIFICATION HEAD
    model.classifier = nn.Sequential(
    nn.Linear(1280, 512),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(512, NUM_CLASSES)
    )
    
    model.load_state_dict(
        torch.load(
            "models/mobilenetv2_smartvision.pth",
            map_location=torch.device("cpu")
        )
    )

    model.eval()
    return model


# EffcientNetB0
@st.cache_resource
def load_custom_EffcientNet():
    model=models.efficientnet_b0(pretrained=False)
    
    #  CUSTOM CLASSIFICATION HEAD
    model.classifier = nn.Sequential(
    nn.Dropout(0.4),
    nn.Linear(1280, 512),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(512, NUM_CLASSES)
    )
    
    model.load_state_dict(
        torch.load(
            "models/EfficientNetB0_smartvision.pth",
            map_location=torch.device("cpu")
        )
    )

    model.eval()
    return model

# Image preprocessing

preprocess = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])
# ------------------------------------------------------------------------------------------------------------------------------------

if page == "🖼️ Image Classification":
    st.subheader("🖼️ Image Classification (Custom Trained CNN Models)")
    st.markdown("""
    This page performs **single-object image classification** using multiple
    **custom-trained CNN models**.  
    Predictions from each model are shown **side-by-side** for comparison.
    """)

    uploaded_file = st.file_uploader(
        "📤 Upload an Image",
        type=["jpg", "jpeg", "png"]
    )

    if uploaded_file:
        image = Image.open(uploaded_file).convert("RGB")

        st.markdown("### 📷 Uploaded Image")
        st.image(image, width=300)

        input_tensor = preprocess(image).unsqueeze(0)

        # Load all models
        models_dict = {
            "🧠 VGG16": load_custom_vgg16(),
            "🧠 ResNet50": load_custom_restnet50(),
            "🧠 MobileNetV2": load_custom_mobilenetv2(),
            "🧠 EfficientNet-B0": load_custom_EffcientNet()
        }

        st.markdown("---")
        st.markdown("### 🔍 Model Predictions (Top-5)")

        cols = st.columns(4)

        for col, (model_name, model) in zip(cols, models_dict.items()):
            with col:
                st.markdown(f"#### {model_name}")

                with torch.no_grad():
                    outputs = model(input_tensor)
                    probs = torch.nn.functional.softmax(outputs[0], dim=0)

                top_probs, top_idxs = torch.topk(
                    probs, min(5, len(Classes))
                )

                for i in range(len(top_idxs)):
                    class_name = Classes[top_idxs[i].item()]
                    confidence = top_probs[i].item()

                    st.write(
                        f"**{i+1}. {class_name}** — {confidence*100:.2f}%"
                    )
                    st.progress(float(confidence))

    else:
        st.info("⬆️ Upload an image to classify.")


import cv2
import numpy as np
from ultralytics import YOLO
from PIL import Image
import streamlit as st


@st.cache_resource
def load_yolo_model():
    return YOLO("best (1).pt")   # path to my already trained model

yolo_model = load_yolo_model()

#-------------------------------------------------------------------------------------------------------------------------------------

if page == "📦 Object Detection":
    st.subheader("🎯 Object Detection using YOLO")
    st.markdown("""
    Upload an image to detect **multiple objects** using a custom-trained YOLO model.
    Bounding boxes, class labels, and confidence scores will be displayed.
    """)

    st.markdown("---")

    # Confidence threshold slider
    conf_threshold = st.slider(
        "🔧 Confidence Threshold",
        min_value=0.1,
        max_value=1.0,
        value=0.5,
        step=0.05
    )

    uploaded_file = st.file_uploader(
        "📤 Upload an Image (JPG / PNG)",
        type=["jpg", "jpeg", "png"]
    )

    if uploaded_file:
        image = Image.open(uploaded_file).convert("RGB")
        img_array = np.array(image)

        st.markdown("### 📷 Uploaded Image")
        st.image(image, width=350)

        st.markdown("---")
        st.markdown("### 🔍 Detection Results")

        # YOLO inference
        results = yolo_model.predict(
            source=img_array,
            conf=conf_threshold,
            save=False
        )

        annotated_img = img_array.copy()

        detections_found = False

        for r in results:
            boxes = r.boxes

            if boxes is not None:
                for box in boxes:
                    detections_found = True

                    x1, y1, x2, y2 = map(int, box.xyxy[0])
                    conf = float(box.conf[0])
                    cls_id = int(box.cls[0])
                    label = yolo_model.names[cls_id]

                    # Draw bounding box
                    cv2.rectangle(
                        annotated_img,
                        (x1, y1),
                        (x2, y2),
                        (0, 255, 0),
                        2
                    )

                    # Label text
                    text = f"{label} {conf*100:.2f}%"
                    cv2.putText(
                        annotated_img,
                        text,
                        (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        0.6,
                        (0, 255, 0),
                        2
                    )

        if detections_found:
            st.image(
                annotated_img,
                caption="YOLO Detection Output",
                use_column_width=True
            )
        else:
            st.warning("⚠️ No objects detected. Try lowering the confidence threshold.")

    else:
        st.info("⬆️ Upload an image to start object detection.")


# This dashboard compares multiple CNN architectures based on accuracy and inference speed.
# While deeper models like VGG16 perform well during training, lightweight models such as
# MobileNetV2 and EfficientNetB0 offer faster inference, making them suitable for real-time applications.


#----------------------------------Model Performance---------------------------------------------------------------
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import streamlit as st

# ---------------- MODEL METRICS DATA ----------------
data = {
    "Model": ["VGG16", "ResNet50", "MobileNetV2", "EfficientNetB0"],
    "Train Accuracy": [0.877, 0.6815, 0.51, 0.5297],
    "Val Accuracy": [0.6345, 0.6855, 0.54, 0.56],
    "Test Accuracy": [0.633, 0.593, 0.579, 0.543],
    "Speed": [8.9, 0.5, 13.0, 12.6]  # higher = faster
}

df = pd.DataFrame(data)

# ---------------- PAGE 4: MODEL PERFORMANCE ----------------
if page == "📊 Model Performance":
    st.subheader("📊 Model Performance Dashboard")
    st.markdown("""
    This section presents a **comparative analysis** of different CNN models used in SmartVision AI.
    It highlights **training, validation, and test accuracy**, along with **relative inference speed**.
    """)

    st.markdown("---")

    # ---------------- MODEL METRICS TABLE ----------------
    st.markdown("### 📋 Model Comparison Table")
    st.dataframe(df, use_container_width=True)

    st.markdown("---")

    # ---------------- ACCURACY COMPARISON ----------------
    st.markdown("### 📈 Accuracy Comparison (Train / Validation / Test)")

    acc_df = df.melt(
        id_vars="Model",
        value_vars=["Train Accuracy", "Val Accuracy", "Test Accuracy"],
        var_name="Dataset",
        value_name="Accuracy"
    )

    fig1, ax1 = plt.subplots()
    sns.barplot(
        data=acc_df,
        x="Model",
        y="Accuracy",
        hue="Dataset",
        ax=ax1
    )
    ax1.set_ylim(0, 1)
    ax1.set_title("Accuracy Comparison Across Models")
    ax1.set_ylabel("Accuracy")
    ax1.set_xlabel("Model")

    st.pyplot(fig1)

    st.markdown("---")

    # ---------------- INFERENCE SPEED COMPARISON ----------------
    st.markdown("### ⚡ Inference Speed Comparison")

    fig2, ax2 = plt.subplots()
    sns.barplot(
        data=df,
        x="Model",
        y="Speed",
        ax=ax2
    )
    ax2.set_title("Relative Inference Speed (Higher is Faster)")
    ax2.set_ylabel("Speed Score")
    ax2.set_xlabel("Model")

    st.pyplot(fig2)

    st.markdown("---")

    # ---------------- PERFORMANCE INSIGHTS ----------------
    st.markdown("### 🧠 Key Observations")
    st.markdown("""
    - **VGG16** shows strong training accuracy but noticeable generalization gap  
    - **ResNet50** provides better validation stability  
    - **MobileNetV2** and **EfficientNetB0** trade accuracy for faster inference  
    - Lightweight models are suitable for **real-time or edge deployment**
    """)


#----------------------------------------------------Live Camera Detection----------------------------------------------------------------------------------
import cv2
import time
import numpy as np
import streamlit as st
from ultralytics import YOLO

#Loading the pretrained model from YOLO

@st.cache_resource
def load_pretrained_yolo():
    return YOLO("yolov8n.pt")   # pretrained model

yolo_model_live = load_pretrained_yolo()


if page == "📸 Live Webcam Detection":
    st.subheader("📸 Live Camera Detection (Lightweight Mode)")
    
    # 0.5 → show only detections above 50% if 0.1 then show only detections above 10%
    conf_thres = st.slider("Confidence Threshold", 0.1, 1.0, 0.5, 0.05)
    run = st.checkbox("▶ Start Camera")

    FRAME_WINDOW = st.image([])
    fps_text = st.empty()

    if run:
        cap = cv2.VideoCapture(0)
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
        
        # using this for faster optimization
        frame_skip = 3   # 🔥 process 1 frame out of 3
        frame_count = 0
        prev_time = time.time()

        while run:
            ret, frame = cap.read()
            if not ret:
                break

            frame_count += 1

            # Skip frames
            if frame_count % frame_skip != 0:
                continue

            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            results = yolo_model_live.predict(
                frame_rgb,
                conf=conf_thres,
                imgsz=416,       # 🔥 smaller image
                verbose=False
            )

            annotated_frame = results[0].plot()

            # FPS
            curr_time = time.time()
            fps = 1 / (curr_time - prev_time)
            prev_time = curr_time

            fps_text.markdown(f"⚡ FPS: {fps:.1f}")

            FRAME_WINDOW.image(
                annotated_frame,
                channels="RGB",
                use_column_width=True
            )

            time.sleep(0.03)   # 🔥 CPU cooldown

        cap.release()


if page == "ℹ️ About":
    st.subheader("📘 About SmartVision AI")
    st.markdown("---")

    # ---------------- PROJECT OVERVIEW ----------------
    st.markdown("## 🧠 Project Overview")
    st.markdown("""
    **SmartVision AI** is an end-to-end **computer vision system** designed to perform  
    **image classification**, **object detection**, and **real-time inference** using
    state-of-the-art deep learning models.

    The project demonstrates the complete AI lifecycle:
    **dataset preparation → model training → optimized inference → deployment using Streamlit**.
    """)

    # ---------------- DATASET INFO ----------------
    st.markdown("## 📂 Dataset Information")
    st.markdown("""
    - **Image Classification Dataset**
        - Domain-specific dataset with **25 object classes**
        - Preprocessed and augmented for robustness
        - Split into **Train / Validation / Test** sets

    - **Object Detection Dataset**
        - General object detection using **COCO dataset**
        - 80 commonly occurring object classes
        - Bounding-box annotated images
    """)

    # ---------------- MODEL ARCHITECTURES ----------------
    st.markdown("## 🏗️ Model Architectures Used")
    st.markdown("""
    ### 🔹 Image Classification Models
    - **VGG16 (Custom Trained)**
        - Modified fully connected layers
        - High accuracy on domain-specific data

    - **ResNet50**
        - Residual connections for deeper learning
        - Strong generalization capability

    - **MobileNetV2**
        - Lightweight architecture
        - Optimized for speed and mobile devices

    - **EfficientNet-B0**
        - Balanced accuracy and efficiency
        - Compound scaling technique

    ### 🔹 Object Detection Model
    - **YOLOv8 (Pretrained)**
        - Real-time object detection
        - Single-stage detector
        - Optimized for speed and accuracy
    """)

    # ---------------- TECH STACK ----------------
    st.markdown("## 🛠️ Technical Stack")
    st.markdown("""
    **Programming Language**
    - Python 🐍

    **Deep Learning & Vision**
    - PyTorch
    - Torchvision
    - Ultralytics YOLOv8
    - OpenCV

    **Data Processing & Visualization**
    - NumPy
    - Pandas
    - Matplotlib
    - Seaborn

    **Web & Deployment**
    - Streamlit
    - VS Code
    - Git & GitHub
    """)

    # ---------------- OPTIMIZATION ----------------
    st.markdown("## ⚡ Performance Optimization Techniques")
    st.markdown("""
    - Model quantization (where applicable)
    - Frame skipping for real-time inference
    - Resolution scaling for faster detection
    - CPU-optimized inference pipeline
    - Streamlit resource caching
    """)

    # ---------------- DEVELOPER INFO ----------------
    st.markdown("## 👨‍💻 Developer Information")
    st.markdown("""
    **Developer:** Rahul Kumar  
    **Degree:** B.Tech in Information Technology  
    **Institution:** IIEST Shibpur  

    **Core Interests:**
    - Computer Vision
    - Deep Learning
    - Full Stack Development
    - AI Model Deployment

    **Project Goal:**
    To build scalable, efficient, and production-ready AI systems
    with real-world deployment considerations.
    """)

    # ---------------------------------------FOOTER --------------------------------------------------------------------------------------
    st.markdown("---")
    st.info("🚀 SmartVision AI — Bridging Deep Learning Research with Real-World Applications")


#-------------------Footer Part in sidebar----------------------------------------------------------------------------------------------

import streamlit as st

st.sidebar.markdown("---")

st.sidebar.markdown("### 📌 SmartVision AI")

col1, col2, col3 = st.sidebar.columns(3)

with col1:
    st.sidebar.markdown(
        "[🌐 GitHub](https://github.com/rahul-tech-kumar/SmartVision-AI---Intelligent-Multi-Class-Object-Recognition-System)",
        unsafe_allow_html=True
    )

with col2:
    st.sidebar.markdown(
        "[💼 LinkedIn](https://www.linkedin.com/in/rahul-kumar-173546228/)",
        unsafe_allow_html=True
    )

with col3:
    st.sidebar.markdown(
        "[✉️ Email](mailto:rahulkumar11062003@gmail.com)",
        unsafe_allow_html=True
    )

st.sidebar.markdown("---")

st.sidebar.markdown(
    """
    <div style="text-align:center; font-size:12px; color:gray;">
        🚀 Built with Streamlit & PyTorch<br>
        © 2025 SmartVision AI
    </div>
    """,
    unsafe_allow_html=True
)