Spaces:

PedroM2626
/

YOLO-Detection-Transfer_Learning

Sleeping

App Files Files Community

PedroM2626 commited on Jan 16

Commit

a2b5362

1 Parent(s): 58ae82a

feat(yolo): add object detection app with YOLOv3-tiny integration

Browse files

Files changed (9) hide show

.env.example +6 -0
Dockerfile +10 -4
models/coco.names +80 -0
models/yolov3-tiny.cfg +182 -0
models/yolov3-tiny.weights +3 -0
requirements.txt +13 -3
src/streamlit_app.py +0 -40
streamlit_app.py +140 -0
yolo_inference.py +156 -0

.env.example ADDED Viewed

	@@ -0,0 +1,6 @@

+YOLO_CFG_PATH=darknet/cfg/yolov3-tiny-obj.cfg
+YOLO_WEIGHTS_PATH=darknet/backup/yolov3-tiny-obj_final.weights
+YOLO_NAMES_PATH=darknet/cfg/obj.names
+YOLO_CONF_THRESHOLD=0.5
+YOLO_NMS_THRESHOLD=0.4
+YOLO_USE_GPU=false

Dockerfile CHANGED Viewed

@@ -1,20 +1,26 @@
-FROM python:3.13.5-slim
 WORKDIR /app
 RUN apt-get update && apt-get install -y \
     build-essential \
     curl \
     git \
     && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt ./
-COPY src/ ./src/
-RUN pip3 install -r requirements.txt
 EXPOSE 8501
 HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
-ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]

+FROM python:3.11-slim
 WORKDIR /app
+# Instalar dependências de sistema
 RUN apt-get update && apt-get install -y \
     build-essential \
     curl \
     git \
+    libgl1 \
+    libglib2.0-0 \
     && rm -rf /var/lib/apt/lists/*
+# Copiar apenas requirements primeiro para aproveitar o cache do Docker
 COPY requirements.txt ./
+RUN pip3 install --no-cache-dir -r requirements.txt
+# Copiar o restante dos arquivos (isso invalidará o cache se algum arquivo mudar)
+COPY . .
 EXPOSE 8501
 HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
+# Flags para evitar erro 403 e garantir funcionamento em proxies
+ENTRYPOINT ["streamlit", "run", "streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.enableCORS=false", "--server.enableXsrfProtection=false"]

models/coco.names ADDED Viewed

	@@ -0,0 +1,80 @@

+person
+bicycle
+car
+motorbike
+aeroplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+sofa
+pottedplant
+bed
+diningtable
+toilet
+tvmonitor
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush

models/yolov3-tiny.cfg ADDED Viewed

	@@ -0,0 +1,182 @@

+[net]
+# Testing
+batch=1
+subdivisions=1
+# Training
+# batch=64
+# subdivisions=2
+width=416
+height=416
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+learning_rate=0.001
+burn_in=1000
+max_batches = 500200
+policy=steps
+steps=400000,450000
+scales=.1,.1
+[convolutional]
+batch_normalize=1
+filters=16
+size=3
+stride=1
+pad=1
+activation=leaky
+[maxpool]
+size=2
+stride=2
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+[maxpool]
+size=2
+stride=2
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+[maxpool]
+size=2
+stride=2
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+[maxpool]
+size=2
+stride=2
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+[maxpool]
+size=2
+stride=2
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+[maxpool]
+size=2
+stride=1
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+###########
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+[yolo]
+mask = 3,4,5
+anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
+classes=80
+num=6
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+[route]
+layers = -4
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[upsample]
+stride=2
+[route]
+layers = -1, 8
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+[yolo]
+mask = 0,1,2
+anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
+classes=80
+num=6
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1

models/yolov3-tiny.weights ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dccea06f59b781ec1234ddf8d1e94b9519a97f4245748a7d4db75d5b7080a42c
+size 35434956

requirements.txt CHANGED Viewed

@@ -1,3 +1,13 @@
-altair
-pandas
-streamlit

+opencv-python>=4.8.0
+numpy>=1.24.0
+python-dotenv>=1.0.0
+jupyter>=1.0.0
+ipywidgets>=8.0.0
+matplotlib>=3.7.0
+pytest>=7.0.0
+ultralytics>=8.0.0
+PyYAML>=6.0.0
+requests>=2.31.0
+tqdm>=4.66.0
+streamlit>=1.25.0
+pillow>=10.0.0

src/streamlit_app.py DELETED Viewed

@@ -1,40 +0,0 @@
-import altair as alt
-import numpy as np
-import pandas as pd
-import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

streamlit_app.py ADDED Viewed

	@@ -0,0 +1,140 @@

+# Importações necessárias para Streamlit, OpenCV e processamento de imagem
+import streamlit as st
+import cv2
+import numpy as np
+from PIL import Image
+from yolo_inference import build_detector_from_env
+# Configuração inicial da página do Streamlit (Título e Layout)
+st.set_page_config(page_title="YOLO Detection - Streamlit", layout="wide", page_icon="🚗")
+def main():
+    """
+    Função principal que gerencia a interface Streamlit.
+    Permite alternar entre detecção em imagens estáticas e vídeo em tempo real via webcam.
+    """
+    st.title("🚀 YOLO Object Detection")
+    st.markdown("---")
+    st.markdown("### Interface interativa para detecção de objetos usando YOLOv3-tiny.")
+    # Sidebar: Painel lateral para controle de parâmetros e seleção de modo
+    st.sidebar.header("🛠️ Configurações do Modelo")
+    # Sliders para ajuste dinâmico dos limiares de detecção
+    conf_threshold = st.sidebar.slider("Confiança Mínima (Threshold)", 0.0, 1.0, 0.5, 0.05,
+                                     help="Nível mínimo de certeza para exibir uma detecção.")
+    nms_threshold = st.sidebar.slider("NMS Threshold", 0.0, 1.0, 0.4, 0.05,
+                                    help="Limiar para supressão de não-máximos (remove bboxes sobrepostas).")
+    st.sidebar.markdown("---")
+    # Seleção do modo de operação
+    mode = st.sidebar.radio("📡 Escolha o Modo de Entrada", ["Imagem", "Câmera (Real-time)"])
+    # Inicializa o detector YOLO
+    # A função build_detector_from_env gerencia o download automático dos pesos se necessário.
+    try:
+        detector = build_detector_from_env(conf_threshold=conf_threshold, nms_threshold=nms_threshold)
+    except Exception as e:
+        st.error(f"❌ Erro ao inicializar detector: {e}")
+        return
+    # Lista de classes do dataset personalizado para monitoramento especial
+    CUSTOM_CLASSES = {"car", "truck", "bus", "motorbike", "bicycle", "van", "threewheel"}
+    if mode == "Imagem":
+        st.subheader("📁 Upload e Detecção em Imagem")
+        uploaded_file = st.file_uploader("Arraste ou selecione uma imagem...", type=["jpg", "jpeg", "png"])
+        if uploaded_file is not None:
+            # Converte o arquivo carregado (BytesIO) para uma imagem PIL e depois para array numpy
+            image = Image.open(uploaded_file)
+            image_np = np.array(image)
+            # Streamlit/PIL trabalham em RGB, mas o detector OpenCV espera BGR
+            frame_bgr = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
+            # Realiza a detecção de objetos
+            with st.spinner('Processando imagem...'):
+                detections = detector.detect(frame_bgr)
+            # Filtra e exibe classes encontradas que pertencem ao dataset customizado
+            hits = sorted({d['class_name'] for d in detections if d['class_name'] in CUSTOM_CLASSES})
+            # Layout em duas colunas: Imagem original vs Resultado
+            col1, col2 = st.columns(2)
+            with col1:
+                st.image(image, caption="Imagem Original", use_column_width=True)
+            with col2:
+                # Desenha os retângulos e labels no frame BGR
+                result_bgr = detector.draw(frame_bgr, detections)
+                # Converte de volta para RGB para exibição correta no Streamlit
+                result_rgb = cv2.cvtColor(result_bgr, cv2.COLOR_BGR2RGB)
+                st.image(result_rgb, caption="Detecções Encontradas", use_column_width=True)
+            # Exibe alertas baseados nas classes detectadas
+            if hits:
+                st.success(f"✅ Objetos do dataset detectados: **{', '.join(hits)}**")
+            else:
+                st.info("ℹ️ Nenhuma classe do dataset específico foi detectada nesta imagem.")
+    elif mode == "Câmera (Real-time)":
+        st.subheader("🎥 Detecção via Webcam em Tempo Real")
+        st.warning("⚠️ Certifique-se de que sua webcam não está sendo usada por outro aplicativo.")
+        # Checkbox para ligar/desligar o loop da câmera
+        run = st.checkbox("Ativar Câmera")
+        # Placeholders para atualização dinâmica do frame e status sem recarregar a página toda
+        frame_placeholder = st.empty()
+        status_placeholder = st.empty()
+        if run:
+            # Inicializa a captura de vídeo (ID 0 costuma ser a webcam padrão)
+            cap = cv2.VideoCapture(0)
+            if not cap.isOpened():
+                st.error("Não foi possível acessar a câmera. Verifique as permissões.")
+                return
+            while run:
+                ret, frame = cap.read()
+                if not ret:
+                    st.error("Falha ao capturar vídeo.")
+                    break
+                # Processa o frame atual
+                detections = detector.detect(frame)
+                # Renderiza as detecções no frame
+                frame_out = detector.draw(frame, detections)
+                # Adiciona overlay de instrução no frame (estilo solicitado anteriormente)
+                cv2.putText(frame_out, "Desmarque 'Ativar Camera' para sair", (20, 40),
+                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
+                # Identifica classes do dataset para exibição de status dinâmico
+                hits = sorted({d['class_name'] for d in detections if d['class_name'] in CUSTOM_CLASSES})
+                if hits:
+                    status_placeholder.success(f"Detectado: **{', '.join(hits)}**")
+                else:
+                    status_placeholder.empty()
+                # Conversão BGR -> RGB para o Streamlit renderizar corretamente
+                frame_rgb = cv2.cvtColor(frame_out, cv2.COLOR_BGR2RGB)
+                frame_placeholder.image(frame_rgb, channels="RGB", use_column_width=True)
+                # Pequeno delay opcional para sincronia (cv2.waitKey não é necessário aqui para exibição,
+                # mas ajuda a liberar CPU)
+                if cv2.waitKey(1) & 0xFF == ord('q'):
+                    break
+            # Libera recursos ao encerrar
+            cap.release()
+            st.write("🏁 Captura encerrada.")
+        else:
+            st.write("💤 Câmera em espera.")
+if __name__ == "__main__":
+    main()

yolo_inference.py ADDED Viewed

	@@ -0,0 +1,156 @@

+import os
+import cv2
+import numpy as np
+from typing import List, Tuple, Dict, Optional
+import urllib.request
+import pathlib
+try:
+    from dotenv import load_dotenv
+except Exception:
+    load_dotenv = None
+def _load_classes(names_path: str) -> List[str]:
+    # Lê arquivo .names e retorna lista de classes
+    if not os.path.isfile(names_path):
+        raise FileNotFoundError(f"Arquivo de classes não encontrado: {names_path}")
+    with open(names_path, "r", encoding="utf-8") as f:
+        classes = [line.strip() for line in f if line.strip()]
+    if not classes:
+        raise ValueError("Lista de classes vazia")
+    return classes
+def _get_output_layer_names(net: cv2.dnn_Net) -> List[str]:
+    # Extrai nomes das camadas de saída (YOLO) para forward
+    layer_names = net.getLayerNames()
+    out_layers = net.getUnconnectedOutLayers()
+    return [layer_names[i - 1] for i in out_layers.flatten()]
+class YoloDetector:
+    # Wrapper para inferência com OpenCV DNN + Darknet cfg/weights
+    def __init__(
+        self,
+        cfg_path: str,
+        weights_path: str,
+        names_path: str,
+        conf_threshold: float = 0.5,
+        nms_threshold: float = 0.4,
+        use_gpu: bool = False,
+    ):
+        if not os.path.isfile(cfg_path):
+            raise FileNotFoundError(f"CFG não encontrado: {cfg_path}")
+        if not os.path.isfile(weights_path):
+            raise FileNotFoundError(f"Pesos não encontrados: {weights_path}")
+        self.classes = _load_classes(names_path)
+        self.net = cv2.dnn.readNetFromDarknet(cfg_path, weights_path)
+        if use_gpu:
+            self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
+            self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
+        else:
+            self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
+            self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
+        self.conf_threshold = conf_threshold
+        self.nms_threshold = nms_threshold
+        self.output_layer_names = _get_output_layer_names(self.net)
+    def detect(
+        self,
+        image_bgr: np.ndarray,
+        input_size: Tuple[int, int] = (416, 416),
+    ) -> List[Dict]:
+        # Executa inferência e retorna lista de detecções com bbox, classe e confiança
+        if image_bgr is None or image_bgr.size == 0:
+            raise ValueError("Imagem inválida para detecção")
+        h, w = image_bgr.shape[:2]
+        blob = cv2.dnn.blobFromImage(image_bgr, 1 / 255.0, input_size, swapRB=True, crop=False)
+        self.net.setInput(blob)
+        layer_outputs = self.net.forward(self.output_layer_names)
+        boxes: List[List[int]] = []
+        confidences: List[float] = []
+        class_ids: List[int] = []
+        for output in layer_outputs:
+            for detection in output:
+                scores = detection[5:]
+                class_id = int(np.argmax(scores))
+                confidence = float(scores[class_id])
+                if confidence >= self.conf_threshold:
+                    center_x = int(detection[0] * w)
+                    center_y = int(detection[1] * h)
+                    width = int(detection[2] * w)
+                    height = int(detection[3] * h)
+                    x = int(center_x - width / 2)
+                    y = int(center_y - height / 2)
+                    boxes.append([x, y, width, height])
+                    confidences.append(confidence)
+                    class_ids.append(class_id)
+        indices = cv2.dnn.NMSBoxes(boxes, confidences, self.conf_threshold, self.nms_threshold)
+        detections: List[Dict] = []
+        if len(indices) > 0:
+            for i in indices.flatten():
+                x, y, w_box, h_box = boxes[i]
+                detections.append(
+                    {
+                        "class_id": class_ids[i],
+                        "class_name": self.classes[class_ids[i]] if 0 <= class_ids[i] < len(self.classes) else str(class_ids[i]),
+                        "confidence": confidences[i],
+                        "box": (max(0, x), max(0, y), max(0, w_box), max(0, h_box)),
+                    }
+                )
+        return detections
+    def draw(self, image_bgr: np.ndarray, detections: List[Dict]) -> np.ndarray:
+        # Desenha retângulos e labels no frame
+        out = image_bgr.copy()
+        for det in detections:
+            x, y, w, h = det["box"]
+            label = f"{det['class_name']} {det['confidence']:.2f}"
+            color = (0, 255, 0)
+            cv2.rectangle(out, (x, y), (x + w, y + h), color, 2)
+            (tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
+            cv2.rectangle(out, (x, y - th - 6), (x + tw + 4, y), color, -1)
+            cv2.putText(out, label, (x + 2, y - 4), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
+        return out
+def build_detector_from_env(
+    conf_threshold: Optional[float] = None,
+    nms_threshold: Optional[float] = None,
+    use_gpu: Optional[bool] = None,
+) -> YoloDetector:
+    # Inicializa via .env; se faltarem caminhos/arquivos, baixa YOLOv3-tiny automaticamente (models/)
+    if load_dotenv is not None:
+        load_dotenv()
+    cfg_path = os.getenv("YOLO_CFG_PATH", "").strip()
+    weights_path = os.getenv("YOLO_WEIGHTS_PATH", "").strip()
+    names_path = os.getenv("YOLO_NAMES_PATH", "").strip()
+    # Se variáveis não existirem OU arquivos não existirem, usar fallback auto-download
+    if (not cfg_path or not weights_path or not names_path
+        or not os.path.isfile(cfg_path)
+        or not os.path.isfile(weights_path)
+        or not os.path.isfile(names_path)):
+        models_dir = pathlib.Path("models")
+        models_dir.mkdir(exist_ok=True)
+        cfg_path = str(models_dir / "yolov3-tiny.cfg")
+        weights_path = str(models_dir / "yolov3-tiny.weights")
+        names_path = str(models_dir / "coco.names")
+        if not os.path.isfile(cfg_path):
+            url_cfg = "https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3-tiny.cfg"
+            urllib.request.urlretrieve(url_cfg, cfg_path)
+        if not os.path.isfile(weights_path):
+            url_weights = "https://pjreddie.com/media/files/yolov3-tiny.weights"
+            urllib.request.urlretrieve(url_weights, weights_path)
+        if not os.path.isfile(names_path):
+            url_names = "https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names"
+            urllib.request.urlretrieve(url_names, names_path)
+    ct = float(os.getenv("YOLO_CONF_THRESHOLD", conf_threshold if conf_threshold is not None else 0.5))
+    nt = float(os.getenv("YOLO_NMS_THRESHOLD", nms_threshold if nms_threshold is not None else 0.4))
+    gpu_flag = os.getenv("YOLO_USE_GPU", "false").lower() in {"1", "true", "yes"} if use_gpu is None else use_gpu
+    return YoloDetector(cfg_path=cfg_path, weights_path=weights_path, names_path=names_path, conf_threshold=ct, nms_threshold=nt, use_gpu=gpu_flag)