import torch import torchvision.transforms as transforms from torchvision.models.detection.ssd import ssd300_vgg16 import numpy as np import cv2 import streamlit as st from PIL import Image from huggingface_hub import hf_hub_download # Define dataset classes classes = ['dengue-regions', 'wet_surface'] num_classes = len(classes) + 1 # Including background # Load Model st.title("Real-Time SSD Object Detection") if 'model' not in st.session_state: model_path = hf_hub_download(repo_id="DhominickJ/MosqScope", filename="mosquito_model.pth") model = ssd300_vgg16(pretrained=True) # Multi-box Algorithm model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu"))) model.eval() st.session_state.model = model # Open webcam cap = cv2.VideoCapture(0) stframe = st.empty() transform = transforms.Compose([ transforms.Resize((300, 300)), transforms.ToTensor() ]) captured_image = st.camera_input("Take a picture") if captured_image is not None: # Load image from Streamlit capture image = Image.open(captured_image) image = image.convert("RGB") # Convert image to NumPy for OpenCV processing image_np = np.array(image) # Apply transformation for model input image_tensor = transform(image).unsqueeze(0) # Run inference with torch.no_grad(): output = st.session_state.model(image_tensor)[0] # Draw detections for box, label in zip(output["boxes"].cpu().numpy(), output["labels"].cpu().numpy()): x_min, y_min, x_max, y_max = map(int, box) cv2.rectangle(image_np, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2) cv2.putText(image_np, classes[label - 1], (x_min, y_min - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) # Display frame in Streamlit stframe.image(image_np, channels="RGB") cap.release() cv2.destroyAllWindows()