import torch
import torchvision.transforms as transforms
from torchvision.models.detection.ssd import ssd300_vgg16
import numpy as np
import cv2
import streamlit as st
from PIL import Image
from huggingface_hub import hf_hub_download

# Define dataset classes
classes = ['dengue-regions', 'wet_surface']
num_classes = len(classes) + 1  # Including background

# Load Model
st.title("Real-Time SSD Object Detection")
if 'model' not in st.session_state:
    model_path = hf_hub_download(repo_id="DhominickJ/MosqScope", filename="mosquito_model.pth")
    model = ssd300_vgg16(pretrained=True)  # Multi-box Algorithm
    model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
    model.eval()
    st.session_state.model = model

# Open webcam
cap = cv2.VideoCapture(0)
stframe = st.empty()

transform = transforms.Compose([
    transforms.Resize((300, 300)),
    transforms.ToTensor()
])

captured_image = st.camera_input("Take a picture")
if captured_image is not None:
    # Load image from Streamlit capture
    image = Image.open(captured_image)
    image = image.convert("RGB")

    # Convert image to NumPy for OpenCV processing
    image_np = np.array(image)

    # Apply transformation for model input
    image_tensor = transform(image).unsqueeze(0)

    # Run inference
    with torch.no_grad():
        output = st.session_state.model(image_tensor)[0]

    # Draw detections
    for box, label in zip(output["boxes"].cpu().numpy(), output["labels"].cpu().numpy()):
        x_min, y_min, x_max, y_max = map(int, box)
        cv2.rectangle(image_np, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
        cv2.putText(image_np, classes[label - 1], (x_min, y_min - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)

    # Display frame in Streamlit
    stframe.image(image_np, channels="RGB")

cap.release()
cv2.destroyAllWindows()