Spaces:

jarondon82
/

ComputerVisionProject

Build error

App Files Files Community

jarondon82 commited on Mar 22, 2025

Commit

048e62d

1 Parent(s): b854a11

Mejorar el procesamiento de video en tiempo real y la cámara alternativa

Browse files

Files changed (1) hide show

streamlit_app.py +184 -81

streamlit_app.py CHANGED Viewed

@@ -2213,10 +2213,16 @@ def main():
                     {"iceServers": [
                         {"urls": ["stun:stun.l.google.com:19302"]},
                         {"urls": ["stun:stun1.l.google.com:19302"]},
-                        {"urls": ["stun:stun2.l.google.com:19302"]}
                     ]}
                 )
                 # Define callback to update session state with frames processed
                 class VideoProcessor(VideoProcessorBase):
                     def __init__(self):
@@ -2224,17 +2230,29 @@ def main():
                         self.face_count = 0
                         self.start_time = time.time()
                         self.processing = True
-                        self.frame_skip = 3  # Solo procesar cada 3 frames para reducir carga
                     def recv(self, frame):
-                        img = frame.to_ndarray(format="bgr24")
-                        self.frame_count += 1
-                        # Solo procesar algunos frames para reducir carga
-                        if self.frame_count % self.frame_skip != 0:
-                            return av.VideoFrame.from_ndarray(img, format="bgr24")
                         try:
                             # Verificar que la imagen no sea nula
                             if img is None or img.size == 0 or img.shape[0] == 0 or img.shape[1] == 0:
                                 # Si la imagen es inválida, devolver un frame en blanco
@@ -2242,16 +2260,23 @@ def main():
                                 cv2.putText(blank_frame, "Error: Invalid frame", (50, 240),
                                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                                 return av.VideoFrame.from_ndarray(blank_frame, format="bgr24")
                             # Reducir tamaño del frame para procesamiento más rápido
                             scale_factor = 0.5
                             h, w = img.shape[:2]
-                            small_img = safe_resize(img, (int(w * scale_factor), int(h * scale_factor)))
-                            if small_img is None:
                                 return av.VideoFrame.from_ndarray(img, format="bgr24")
                             # Detect faces - la función ahora devuelve directamente los bboxes
-                            bboxes = detect_face_dnn(face_net, small_img, confidence_threshold)
                             # Ajustar bounding boxes al tamaño original
                             original_bboxes = []
@@ -2268,7 +2293,7 @@ def main():
                             self.face_count = len(original_bboxes)
                             current_time = time.time()
                             elapsed_time = current_time - self.start_time
-                            fps = self.frame_count / elapsed_time if elapsed_time > 0 else 0
                             # Actualizar métricas en session_state para que sean accesibles fuera
                             st.session_state.faces_detected = self.face_count
@@ -2278,83 +2303,85 @@ def main():
                             result_img = img.copy()
                             for i, (x1, y1, x2, y2, conf) in enumerate(original_bboxes):
                                 cv2.rectangle(result_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
-                                cv2.putText(result_img, f"Face {i+1}: {conf:.2f}", (x1, y1-10),
                                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
                             # Añadir información FPS y rostros
                             cv2.putText(result_img, f"FPS: {fps:.1f}", (10, 30),
                                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
-                            cv2.putText(result_img, f"Faces: {self.face_count}", (10, 60),
                                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
                             return av.VideoFrame.from_ndarray(result_img, format="bgr24")
                         except Exception as e:
-                            print(f"Error en procesamiento de video: {str(e)}")
-                            # En caso de error, devolver el frame original
-                            return av.VideoFrame.from_ndarray(img, format="bgr24")
-                # Display WebRTC streamer
                 webrtc_ctx = webrtc_streamer(
                     key="face-recognition",
                     mode=WebRtcMode.SENDRECV,
                     rtc_configuration=rtc_configuration,
-                    video_processor_factory=VideoProcessor,
                     media_stream_constraints={"video": {"width": 640, "height": 480}, "audio": False},
                     async_processing=True,
-                    video_html_attrs={
-                        "style": {"width": "100%", "margin": "0 auto", "border": "2px solid"},
-                        "controls": False,
-                        "autoPlay": True,
-                    },
                 )
-                # Update metrics if WebRTC is running
                 if webrtc_ctx.state.playing:
-                    # Use a separate thread to update metrics
                     faces_metric.metric("Faces detected", st.session_state.get('faces_detected', 0))
                     fps_metric.metric("FPS", f"{st.session_state.get('fps', 0):.1f}")
                     time_metric.metric("Status", "Running")
-                    # Add instructions
-                    st.info("WebRTC camera is active. Face recognition is being processed in real-time.")
-                    st.warning("Note: For better performance, make sure you have good lighting and face the camera directly.")
                 else:
                     faces_metric.metric("Faces detected", 0)
-                    fps_metric.metric("FPS", 0)
                     time_metric.metric("Status", "Stopped")
-                    st.info("Haga clic en el botón 'START' para activar la cámara y comenzar el reconocimiento en tiempo real.")
-                    st.warning("Nota: Si está ejecutando esto en Hugging Face Spaces, puede que WebRTC no funcione correctamente. Utilice las opciones alternativas a continuación.")
-                    # WebRTC troubleshooting
-                    with st.expander("Ayuda: Problemas con WebRTC"):
-                        st.markdown("""
-                        ### Solución de problemas con WebRTC
-                        Si el reconocimiento en tiempo real no funciona, puede deberse a las siguientes razones:
-                        1. **Restricciones de seguridad en Hugging Face Spaces**: Algunos navegadores restringen el acceso a la cámara en entornos como este.
-                        2. **Problemas de conexión**: WebRTC requiere establecer una conexión que puede ser bloqueada por firewalls o proxies.
-                        3. **Permisos de cámara**: Es posible que deba conceder permisos explícitos al navegador para acceder a su cámara.
-                        ### Qué hacer:
-                        1. Intente usar otro navegador (Chrome suele funcionar mejor)
-                        2. Asegúrese de que ha concedido permisos de cámara cuando el navegador los solicita
-                        3. Si sigue sin funcionar, use las opciones alternativas que se muestran a continuación
-                        """)
                     # Añadir opción de cámara alternativa para entornos donde WebRTC no funciona bien
                     st.markdown("---")
                     st.markdown("### Modo de cámara alternativo")
                     col1, col2 = st.columns(2)
-                    if col1.button("Usar cámara simple", key="simple_camera_button1"):
                         st.session_state.simple_camera = True
                         st.session_state.demo_running = False
                         st.session_state.upload_mode = False
-                    if col2.button("Detener cámara simple", key="stop_camera_button1"):
                         st.session_state.simple_camera = False
                     if st.session_state.get('simple_camera', False):
@@ -2363,44 +2390,120 @@ def main():
                         # Configurar métricas
                         faces_metric.metric("Faces detected", 0)
-                        fps_metric.metric("FPS", "0.0")
                         time_metric.metric("Status", "Running")
                         # Cámara simple que toma una imagen a la vez
                         with camera_container:
-                            st.info("Cámara simple activada. Cada imagen se procesa individualmente.")
                             # Usar imagen de la cámara
                             captured_image = st.camera_input("Tomar foto para reconocimiento", key="camera_simple_input")
                             # Procesar la imagen si está disponible
                             if captured_image is not None:
-                                # Leer imagen
-                                image_bytes = captured_image.getvalue()
-                                image = cv2.imdecode(np.frombuffer(image_bytes, np.uint8), cv2.IMREAD_COLOR)
-                                # Detectar rostros
-                                bboxes = detect_face_dnn(face_net, image, confidence_threshold)
-                                # Actualizar métricas
-                                faces_metric.metric("Faces detected", len(bboxes))
-                                fps_metric.metric("FPS", "N/A")
-                                # Dibujar resultados
-                                result_img = image.copy()
-                                for i, bbox in enumerate(bboxes):
-                                    x1, y1, x2, y2, _ = bbox
-                                    cv2.rectangle(result_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
-                                    cv2.putText(result_img, f"Face {i+1}", (x1, y1-10),
-                                               cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
-                                # Mostrar resultado
-                                st.image(result_img, channels="BGR", caption="Rostros detectados", use_column_width=True)
-                                if len(bboxes) > 0:
-                                    st.success(f"Se detectaron {len(bboxes)} rostros")
-                                else:
-                                    st.warning("No se detectaron rostros. Intente con una iluminación mejor o una posición diferente.")
                     # Opción alternativa en caso de problemas con WebRTC (mantenemos esta opción también)
                     st.markdown("---")

                     {"iceServers": [
                         {"urls": ["stun:stun.l.google.com:19302"]},
                         {"urls": ["stun:stun1.l.google.com:19302"]},
+                        {"urls": ["stun2.l.google.com:19302"]}
                     ]}
                 )
+                # Initialize session state variables if they don't exist
+                if 'faces_detected' not in st.session_state:
+                    st.session_state.faces_detected = 0
+                if 'fps' not in st.session_state:
+                    st.session_state.fps = 0
                 # Define callback to update session state with frames processed
                 class VideoProcessor(VideoProcessorBase):
                     def __init__(self):
                         self.face_count = 0
                         self.start_time = time.time()
                         self.processing = True
+                        self.frame_skip = 2  # Process every other frame to reduce load
+                        self.frames_processed = 0
+                        self.last_log_time = time.time()
                     def recv(self, frame):
                         try:
+                            img = frame.to_ndarray(format="bgr24")
+                            self.frame_count += 1
+                            # Solo procesar algunos frames para reducir carga
+                            if self.frame_count % self.frame_skip != 0:
+                                return av.VideoFrame.from_ndarray(img, format="bgr24")
+                            self.frames_processed += 1
+                            now = time.time()
+                            # Registro de diagnóstico cada 5 segundos
+                            if now - self.last_log_time > 5:
+                                print(f"Frames procesados: {self.frames_processed}, " +
+                                      f"Tiempo transcurrido: {now - self.start_time:.1f}s, " +
+                                      f"FPS: {self.frames_processed/(now - self.start_time):.1f}")
+                                self.last_log_time = now
                             # Verificar que la imagen no sea nula
                             if img is None or img.size == 0 or img.shape[0] == 0 or img.shape[1] == 0:
                                 # Si la imagen es inválida, devolver un frame en blanco
                                 cv2.putText(blank_frame, "Error: Invalid frame", (50, 240),
                                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                                 return av.VideoFrame.from_ndarray(blank_frame, format="bgr24")
                             # Reducir tamaño del frame para procesamiento más rápido
                             scale_factor = 0.5
                             h, w = img.shape[:2]
+                            try:
+                                small_img = cv2.resize(img, (int(w * scale_factor), int(h * scale_factor)))
+                            except Exception as e:
+                                print(f"Error al redimensionar: {e}")
                                 return av.VideoFrame.from_ndarray(img, format="bgr24")
                             # Detect faces - la función ahora devuelve directamente los bboxes
+                            try:
+                                bboxes = detect_face_dnn(face_net, small_img, confidence_threshold)
+                            except Exception as e:
+                                print(f"Error al detectar rostros: {e}")
+                                bboxes = []
                             # Ajustar bounding boxes al tamaño original
                             original_bboxes = []
                             self.face_count = len(original_bboxes)
                             current_time = time.time()
                             elapsed_time = current_time - self.start_time
+                            fps = self.frames_processed / elapsed_time if elapsed_time > 0 else 0
                             # Actualizar métricas en session_state para que sean accesibles fuera
                             st.session_state.faces_detected = self.face_count
                             result_img = img.copy()
                             for i, (x1, y1, x2, y2, conf) in enumerate(original_bboxes):
                                 cv2.rectangle(result_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
+                                cv2.putText(result_img, f"Rostro {i+1}: {conf:.2f}", (x1, y1-10),
                                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
                             # Añadir información FPS y rostros
                             cv2.putText(result_img, f"FPS: {fps:.1f}", (10, 30),
                                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
+                            cv2.putText(result_img, f"Rostros: {self.face_count}", (10, 60),
                                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
                             return av.VideoFrame.from_ndarray(result_img, format="bgr24")
                         except Exception as e:
+                            print(f"Error general en procesamiento de video: {str(e)}")
+                            try:
+                                # Intentar devolver el frame original
+                                return av.VideoFrame.from_ndarray(img, format="bgr24")
+                            except:
+                                # Si eso falla, devolver un frame en blanco como último recurso
+                                blank = np.ones((480, 640, 3), dtype=np.uint8) * 255
+                                return av.VideoFrame.from_ndarray(blank, format="bgr24")
+                # Display WebRTC streamer con opciones simplificadas para mejorar compatibilidad
+                st.info("⚠️ Si el video no carga: Intente usar Chrome, recargar la página o pruebe las opciones alternativas abajo.")
                 webrtc_ctx = webrtc_streamer(
                     key="face-recognition",
                     mode=WebRtcMode.SENDRECV,
                     rtc_configuration=rtc_configuration,
                     media_stream_constraints={"video": {"width": 640, "height": 480}, "audio": False},
+                    video_processor_factory=VideoProcessor,
                     async_processing=True,
                 )
+                # Establecer y actualizar métricas
                 if webrtc_ctx.state.playing:
                     faces_metric.metric("Faces detected", st.session_state.get('faces_detected', 0))
                     fps_metric.metric("FPS", f"{st.session_state.get('fps', 0):.1f}")
                     time_metric.metric("Status", "Running")
+                    # Mostrar instrucciones de uso
+                    st.success("Cámara web activada. Los rostros detectados serán identificados en tiempo real.")
                 else:
                     faces_metric.metric("Faces detected", 0)
+                    fps_metric.metric("FPS", "0")
                     time_metric.metric("Status", "Stopped")
+                    # Mostrar instrucciones de activación
+                    st.warning("Haga clic en START para activar la cámara web. Esta función puede no estar disponible en entornos como Hugging Face Spaces debido a restricciones de seguridad.")
+                # WebRTC troubleshooting
+                with st.expander("Ayuda: Problemas con WebRTC"):
+                    st.markdown("""
+                    ### Solución de problemas con WebRTC
+                    Si el reconocimiento en tiempo real no funciona, puede deberse a las siguientes razones:
+                    1. **Restricciones de seguridad en Hugging Face Spaces**: Algunos navegadores restringen el acceso a la cámara en entornos como este.
+                    2. **Problemas de conexión**: WebRTC requiere establecer una conexión que puede ser bloqueada por firewalls o proxies.
+                    3. **Permisos de cámara**: Es posible que deba conceder permisos explícitos al navegador para acceder a su cámara.
+                    ### Qué hacer:
+                    1. Intente usar otro navegador (Chrome suele funcionar mejor)
+                    2. Asegúrese de que ha concedido permisos de cámara cuando el navegador los solicita
+                    3. Si sigue sin funcionar, use las opciones alternativas que se muestran a continuación
+                    """)
                     # Añadir opción de cámara alternativa para entornos donde WebRTC no funciona bien
                     st.markdown("---")
                     st.markdown("### Modo de cámara alternativo")
                     col1, col2 = st.columns(2)
+                    simple_camera = col1.button("Usar cámara simple", key="simple_camera_button1", use_container_width=True)
+                    stop_simple_camera = col2.button("Detener cámara simple", key="stop_camera_button1", use_container_width=True)
+                    if simple_camera:
                         st.session_state.simple_camera = True
                         st.session_state.demo_running = False
                         st.session_state.upload_mode = False
+                    if stop_simple_camera:
                         st.session_state.simple_camera = False
                     if st.session_state.get('simple_camera', False):
                         # Configurar métricas
                         faces_metric.metric("Faces detected", 0)
+                        fps_metric.metric("FPS", "N/A")
                         time_metric.metric("Status", "Running")
                         # Cámara simple que toma una imagen a la vez
                         with camera_container:
+                            st.info("Cámara simple activada. Cada imagen se procesa individualmente. Tome una foto con su cámara para detectar rostros.")
                             # Usar imagen de la cámara
                             captured_image = st.camera_input("Tomar foto para reconocimiento", key="camera_simple_input")
                             # Procesar la imagen si está disponible
                             if captured_image is not None:
+                                try:
+                                    # Leer imagen
+                                    image_bytes = captured_image.getvalue()
+                                    image = cv2.imdecode(np.frombuffer(image_bytes, np.uint8), cv2.IMREAD_COLOR)
+                                    if image is not None and image.size > 0:
+                                        # Detectar rostros
+                                        bboxes = detect_face_dnn(face_net, image, confidence_threshold)
+                                        # Actualizar métricas
+                                        faces_metric.metric("Faces detected", len(bboxes))
+                                        # Dibujar resultados
+                                        result_img = image.copy()
+                                        for i, bbox in enumerate(bboxes):
+                                            x1, y1, x2, y2, conf = bbox
+                                            cv2.rectangle(result_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
+                                            cv2.putText(result_img, f"Rostro {i+1}: {conf:.2f}", (x1, y1-10),
+                                                      cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
+                                        # Mostrar resultado
+                                        st.image(result_img, channels="BGR", caption="Rostros detectados", use_column_width=True)
+                                        if len(bboxes) > 0:
+                                            # Si hay rostros registrados, intentar reconocerlos
+                                            if st.session_state.face_database and len(st.session_state.face_database) > 0:
+                                                st.subheader("Reconocimiento de rostros:")
+                                                recognition_results = []
+                                                for i, bbox in enumerate(bboxes):
+                                                    x1, y1, x2, y2, _ = bbox
+                                                    face_img = image[y1:y2, x1:x2]
+                                                    # Extraer el embedding del rostro
+                                                    if model_choice == "VGG-Face":
+                                                        embedding = vggface_model(face_img)
+                                                    elif model_choice == "Facenet":
+                                                        embedding = facenet_model(face_img)
+                                                    elif model_choice == "OpenFace":
+                                                        embedding = openface_model(face_img)
+                                                    elif model_choice == "ArcFace":
+                                                        embedding = arcface_model(face_img)
+                                                    else:  # Default to VGG-Face
+                                                        embedding = vggface_model(face_img)
+                                                    # Comparar con rostros registrados
+                                                    best_match = None
+                                                    best_similarity = -1
+                                                    for name, info in st.session_state.face_database.items():
+                                                        if 'embeddings' in info and info['embeddings']:
+                                                            for emb_info in info['embeddings']:
+                                                                if emb_info['model'] == model_choice:
+                                                                    stored_emb = emb_info['embedding']
+                                                                    similarity = cosine_similarity(embedding, stored_emb)
+                                                                    if similarity > similarity_threshold and similarity > best_similarity:
+                                                                        best_similarity = similarity
+                                                                        best_match = name
+                                                    if best_match is not None:
+                                                        recognition_results.append({
+                                                            'bbox': bbox,
+                                                            'name': best_match,
+                                                            'similarity': best_similarity
+                                                        })
+                                                # Mostrar resultados de reconocimiento
+                                                if recognition_results:
+                                                    result_with_names = result_img.copy()
+                                                    for result in recognition_results:
+                                                        x1, y1, x2, y2, _ = result['bbox']
+                                                        name = result['name']
+                                                        similarity = result['similarity']
+                                                        # Dibujar nombre y similitud
+                                                        cv2.rectangle(result_with_names, (x1, y1), (x2, y2), (0, 255, 0), 2)
+                                                        label = f"{name}: {similarity:.2f}"
+                                                        cv2.putText(result_with_names, label, (x1, y1-10),
+                                                                  cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
+                                                    st.image(result_with_names, channels="BGR", caption="Rostros reconocidos", use_column_width=True)
+                                                    # Mostrar tabla de resultados
+                                                    results_df = pd.DataFrame([
+                                                        {"Nombre": r['name'], "Confianza": f"{r['similarity']:.2f}"}
+                                                        for r in recognition_results
+                                                    ])
+                                                    st.table(results_df)
+                                                else:
+                                                    st.warning("No se pudo reconocer ninguno de los rostros detectados.")
+                                            else:
+                                                st.info("No hay rostros registrados para comparar. Registre rostros en la sección 'Face Registration'.")
+                                            st.success(f"Se detectaron {len(bboxes)} rostros")
+                                        else:
+                                            st.warning("No se detectaron rostros. Intente con una iluminación mejor o una posición diferente.")
+                                    else:
+                                        st.error("No se pudo procesar la imagen. Intente tomar otra foto.")
+                                except Exception as e:
+                                    st.error(f"Error al procesar la imagen: {str(e)}")
+                                    st.info("Intente tomar otra foto o use otra opción.")
                     # Opción alternativa en caso de problemas con WebRTC (mantenemos esta opción también)
                     st.markdown("---")