Spaces:

jarondon82
/

ComputerVisionProject

Build error

App Files Files Community

jarondon82 commited on Mar 22, 2025

Commit

19ebc3a

1 Parent(s): 44a87dc

Implementada WebRTC para reconocimiento en tiempo real

Browse files

Files changed (2) hide show

requirements.txt +8 -5
streamlit_app.py +163 -232

requirements.txt CHANGED Viewed

@@ -1,9 +1,9 @@
 streamlit==1.31.0
 opencv-python-headless==4.8.0.76
-numpy==1.23.5
-Pillow==10.0.0
-scikit-learn==1.0.2
-matplotlib==3.7.2
 pandas==2.0.3
 tensorflow==2.12.0
 keras==2.12.0
@@ -12,4 +12,7 @@ mtcnn==0.1.1
 retina-face==0.0.12
 requests==2.32.2
 dlib-binary==19.24.1
-deepface==0.0.79

 streamlit==1.31.0
 opencv-python-headless==4.8.0.76
+numpy==1.24.4
+Pillow==10.2.0
+scikit-learn==1.5.0
+matplotlib==3.9.0
 pandas==2.0.3
 tensorflow==2.12.0
 keras==2.12.0
 retina-face==0.0.12
 requests==2.32.2
 dlib-binary==19.24.1
+deepface==0.0.82
+streamlit-webrtc==0.47.1
+gdown==5.1.0
+av==10.0.0

streamlit_app.py CHANGED Viewed

@@ -12,6 +12,8 @@ import matplotlib.pyplot as plt
 import pickle
 from sklearn.metrics.pairwise import cosine_similarity # type: ignore
 import pandas as pd
 # Importar las utilidades para la base de datos de rostros
 try:
@@ -2085,7 +2087,7 @@ def main():
                 st.warning("No faces registered. Please register at least one face first.")
             else:
                 # Configuración avanzada
-                with st.expander("Configuración avanzada", expanded=False):
                     # Configuración de umbral de similitud
                     similarity_threshold = st.slider(
                         "Similarity threshold (%)",
@@ -2094,7 +2096,7 @@ def main():
                         value=45.0,
                         step=5.0,
                         key="realtime_threshold",
-                        help="Porcentaje mínimo de similitud para considerar una coincidencia"
                     )
                     confidence_threshold = st.slider(
@@ -2104,61 +2106,34 @@ def main():
                         value=0.5,
                         step=0.05,
                         key="realtime_confidence",
-                        help="Un valor más alto es más restrictivo pero más preciso"
                     )
                     model_choice = st.selectbox(
                         "Embedding model",
                         ["VGG-Face", "Facenet", "OpenFace", "ArcFace"],
                         key="realtime_model",
-                        help="Diferentes modelos pueden dar resultados distintos según las características faciales"
                     )
                     voting_method = st.radio(
-                        "Método de votación para múltiples embeddings",
-                        ["Promedio", "Mejor coincidencia", "Votación ponderada"],
                         key="realtime_voting",
-                        help="Cómo combinar resultados cuando hay múltiples imágenes de una persona"
                     )
                     show_confidence = st.checkbox(
-                        "Mostrar porcentaje de confianza",
                         value=True,
-                        help="Mostrar el porcentaje de similitud junto al nombre"
                     )
                     stabilize_results = st.checkbox(
-                        "Estabilizar resultados",
                         value=True,
-                        help="Reduce fluctuaciones en la identificación usando un promedio temporal"
                     )
-                    fps_limit = st.slider(
-                        "Límite de FPS",
-                        min_value=5,
-                        max_value=30,
-                        value=15,
-                        step=1,
-                        help="Limitar los frames por segundo para reducir uso de CPU"
-                    )
-                # Inicializar estado de la cámara
-                if 'recognition_camera_running' not in st.session_state:
-                    st.session_state.recognition_camera_running = False
-                # Inicializar historial de reconocimiento para estabilización
-                if 'recognition_history' not in st.session_state:
-                    st.session_state.recognition_history = {}
-                # Botones para controlar la cámara
-                col1, col2 = st.columns(2)
-                start_button = col1.button("Iniciar Cámara", key="start_recognition_camera",
-                                          on_click=lambda: setattr(st.session_state, 'recognition_camera_running', True))
-                stop_button = col2.button("Detener Cámara", key="stop_recognition_camera",
-                                         on_click=lambda: setattr(st.session_state, 'recognition_camera_running', False))
-                # Placeholder para el video
-                video_placeholder = st.empty()
                 # Placeholder para métricas
                 metrics_cols = st.columns(3)
@@ -2169,211 +2144,167 @@ def main():
                 with metrics_cols[2]:
                     time_metric = st.empty()
-                if st.session_state.recognition_camera_running:
-                    st.info("Cámara activada. Procesando video en tiempo real...")
-                    # Inicializar webcam
-                    cap = cv2.VideoCapture(0)
-                    if not cap.isOpened():
-                        st.error("No se pudo acceder a la cámara. Asegúrese de que esté conectada y no esté siendo utilizada por otra aplicación.")
-                        st.session_state.recognition_camera_running = False
-                    else:
-                        try:
-                            # Variables para métricas
-                            frame_count = 0
-                            start_time = time.time()
-                            last_frame_time = start_time
-                            fps_history = []
-                            while st.session_state.recognition_camera_running:
-                                # Control de FPS
-                                current_time = time.time()
-                                elapsed = current_time - last_frame_time
-                                if elapsed < 1.0/fps_limit:
-                                    time.sleep(0.01)  # Pequeña pausa para no sobrecargar la CPU
-                                    continue
-                                last_frame_time = current_time
-                                # Leer frame
-                                ret, frame = cap.read()
-                                if not ret:
-                                    st.error("Error al leer frame de la cámara.")
-                                    break
-                                # Actualizar contador de frames
-                                frame_count += 1
-                                # Calcular FPS
-                                if frame_count % 5 == 0:
-                                    fps = 5 / (current_time - start_time)
-                                    fps_history.append(fps)
-                                    if len(fps_history) > 10:
-                                        fps_history.pop(0)
-                                    avg_fps = sum(fps_history) / len(fps_history)
-                                    start_time = current_time
-                                    # Actualizar métricas
-                                    fps_metric.metric("FPS", f"{avg_fps:.1f}")
-                                    time_metric.metric("Tiempo activo", f"{int(current_time - time.time() + st.session_state.get('camera_start_time', current_time))}s")
-                                # Detect rostros
-                                detections = detect_face_dnn(face_net, frame, confidence_threshold)
-                                _, bboxes = process_face_detections(frame, detections, confidence_threshold)
-                                # Actualizar métrica de rostros
-                                if frame_count % 5 == 0:
-                                    faces_metric.metric("Faces detected", len(bboxes))
-                                # Reconocer cada rostro
-                                result_frame = frame.copy()
-                                for i, bbox in enumerate(bboxes):
-                                    face_id = f"face_{i}"
-                                    # Extraer embedding del rostro
-                                    embedding = extract_face_embeddings(frame, bbox, model_name=model_choice)
-                                    if embedding is not None:
-                                        # Compare con rostros registrados
-                                        matches = []
-                                        for name, info in st.session_state.face_database.items():
-                                            if 'embeddings' in info:
-                                                # Nuevo formato con múltiples embeddings
-                                                similarities = []
-                                                for idx, registered_embedding in enumerate(info['embeddings']):
-                                                    # Usar el mismo modelo si es posible
-                                                    if info['models'][idx] == model_choice:
-                                                        weight = 1.0  # Dar más peso a embeddings del mismo modelo
-                                                    else:
-                                                        weight = 0.8  # Peso menor para embeddings de otros modelos
-                                                    # Asegurarse de que los embeddings sean compatibles
-                                                    try:
-                                                        similarity = cosine_similarity([embedding["embedding"]], [registered_embedding])[0][0] * 100 * weight
-                                                        similarities.append(similarity)
-                                                    except ValueError as e:
-                                                        # Si hay error de dimensiones incompatibles, omitir esta comparación
-                                                        continue
-                                                # Aplicar método de votación seleccionado
-                                                if voting_method == "Promedio":
-                                                    final_similarity = sum(similarities) / len(similarities)
-                                                elif voting_method == "Mejor coincidencia":
-                                                    final_similarity = max(similarities)
-                                                else:  # Votación ponderada
-                                                    # Dar más peso a similitudes más altas
-                                                    weighted_sum = sum(s * (i+1) for i, s in enumerate(sorted(similarities)))
-                                                    weights_sum = sum(i+1 for i in range(len(similarities)))
-                                                    final_similarity = weighted_sum / weights_sum
-                                                matches.append({"name": name, "similarity": final_similarity})
-                                            else:
-                                                # Formato antiguo con un solo embedding
-                                                registered_embedding = info['embedding']
-                                                try:
-                                                    similarity = cosine_similarity([embedding["embedding"]], [registered_embedding])[0][0] * 100
-                                                    matches.append({"name": name, "similarity": similarity})
-                                                except ValueError as e:
-                                                    # Si hay error de dimensiones incompatibles, omitir esta comparación
-                                                    # Modelos incompatibles: {embedding['model']} vs formato antiguo
-                                                    continue
-                                        # Ordenar coincidencias por similitud
-                                        matches.sort(key=lambda x: x["similarity"], reverse=True)
-                                        # Estabilizar resultados si está activado
-                                        if stabilize_results and matches:
-                                            best_match = matches[0]
-                                            # Inicializar historial para este rostro si no existe
-                                            if face_id not in st.session_state.recognition_history:
-                                                st.session_state.recognition_history[face_id] = {
-                                                    "names": [],
-                                                    "similarities": []
-                                                }
-                                            # Añadir al historial
-                                            history = st.session_state.recognition_history[face_id]
-                                            history["names"].append(best_match["name"])
-                                            history["similarities"].append(best_match["similarity"])
-                                            # Limitar historial a los últimos 10 frames
-                                            if len(history["names"]) > 10:
-                                                history["names"].pop(0)
-                                                history["similarities"].pop(0)
-                                            # Determinar el nombre más frecuente en el historial
-                                            if len(history["names"]) >= 3:  # Necesitamos al menos 3 frames para estabilizar
-                                                name_counts = {}
-                                                for name in history["names"]:
-                                                    if name not in name_counts:
-                                                        name_counts[name] = 0
-                                                    name_counts[name] += 1
-                                                # Encontrar el nombre más frecuente
-                                                stable_name = max(name_counts.items(), key=lambda x: x[1])[0]
-                                                # Calcular similitud promedio para ese nombre
-                                                stable_similarities = [
-                                                    history["similarities"][i]
-                                                    for i in range(len(history["names"]))
-                                                    if history["names"][i] == stable_name
-                                                ]
-                                                stable_similarity = sum(stable_similarities) / len(stable_similarities)
-                                                # Reemplazar la mejor coincidencia con el resultado estabilizado
-                                                best_match = {"name": stable_name, "similarity": stable_similarity}
-                                            else:
-                                                best_match = matches[0]
-                                        else:
-                                            best_match = matches[0] if matches else None
-                                        # Dibujar resultado en la imagen
-                                        x1, y1, x2, y2, _ = bbox
-                                        if best_match and best_match["similarity"] >= similarity_threshold:
-                                            # Coincidencia encontrada
-                                            # Color basado en nivel de similitud
-                                            if best_match["similarity"] >= 80:
-                                                color = (0, 255, 0)  # Verde para alta similitud
-                                            elif best_match["similarity"] >= 65:
-                                                color = (0, 255, 255)  # Amarillo para media similitud
-                                            else:
-                                                color = (0, 165, 255)  # Naranja para baja similitud
-                                            # Dibujar rectángulo y etiqueta
-                                            cv2.rectangle(result_frame, (x1, y1), (x2, y2), color, 2)
-                                            if show_confidence:
-                                                label = f"{best_match['name']}: {best_match['similarity']:.1f}%"
-                                            else:
-                                                label = f"{best_match['name']}"
-                                            cv2.putText(result_frame, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
-                                        else:
-                                            # No hay coincidencia
-                                            cv2.rectangle(result_frame, (x1, y1), (x2, y2), (0, 0, 255), 2)
-                                            if best_match:
-                                                label = f"Desconocido: {best_match['similarity']:.1f}%" if show_confidence else "Desconocido"
-                                            else:
-                                                label = "Desconocido"
-                                            cv2.putText(result_frame, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
-                            # Mostrar resultado
-                            video_placeholder.image(result_frame, channels="BGR", use_container_width=True)
-                        finally:
-                            # Liberar la cámara cuando se detenga
-                            cap.release()
-                            # Limpiar historial de reconocimiento
-                            st.session_state.recognition_history = {}
                 else:
-                    st.info("Haga clic en 'Iniciar Cámara' para comenzar el reconocimiento en tiempo real.")
 # Si se ejecuta este archivo directamente, llamar a la función main
 if __name__ == "__main__":

 import pickle
 from sklearn.metrics.pairwise import cosine_similarity # type: ignore
 import pandas as pd
+import av
+from streamlit_webrtc import webrtc_streamer, VideoProcessorBase, RTCConfiguration, WebRtcMode
 # Importar las utilidades para la base de datos de rostros
 try:
                 st.warning("No faces registered. Please register at least one face first.")
             else:
                 # Configuración avanzada
+                with st.expander("Advanced Configuration", expanded=False):
                     # Configuración de umbral de similitud
                     similarity_threshold = st.slider(
                         "Similarity threshold (%)",
                         value=45.0,
                         step=5.0,
                         key="realtime_threshold",
+                        help="Minimum similarity percentage to consider a match"
                     )
                     confidence_threshold = st.slider(
                         value=0.5,
                         step=0.05,
                         key="realtime_confidence",
+                        help="Higher value is more restrictive but more accurate"
                     )
                     model_choice = st.selectbox(
                         "Embedding model",
                         ["VGG-Face", "Facenet", "OpenFace", "ArcFace"],
                         key="realtime_model",
+                        help="Different models can give different results depending on facial features"
                     )
                     voting_method = st.radio(
+                        "Voting method for multiple embeddings",
+                        ["Average", "Best match", "Weighted voting"],
                         key="realtime_voting",
+                        help="How to combine results when there are multiple images of a person"
                     )
                     show_confidence = st.checkbox(
+                        "Show confidence percentage",
                         value=True,
+                        help="Show similarity percentage next to the name"
                     )
                     stabilize_results = st.checkbox(
+                        "Stabilize results",
                         value=True,
+                        help="Reduce identification fluctuations using temporal averaging"
                     )
                 # Placeholder para métricas
                 metrics_cols = st.columns(3)
                 with metrics_cols[2]:
                     time_metric = st.empty()
+                # WebRTC configuration
+                rtc_configuration = RTCConfiguration(
+                    {"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}
+                )
+                # Define callback to update session state with frames processed
+                class VideoProcessor(VideoProcessorBase):
+                    def __init__(self):
+                        self.frame_count = 0
+                        self.face_count = 0
+                        self.start_time = time.time()
+                    def recv(self, frame):
+                        img = frame.to_ndarray(format="bgr24")
+                        self.frame_count += 1
+                        # Detect faces
+                        detections = detect_face_dnn(face_net, img, confidence_threshold)
+                        _, bboxes = process_face_detections(img, detections, confidence_threshold)
+                        # Update face count in session state
+                        self.face_count = len(bboxes)
+                        if self.frame_count % 5 == 0:
+                            if 'webrtc_face_count' not in st.session_state:
+                                st.session_state.webrtc_face_count = 0
+                            st.session_state.webrtc_face_count = self.face_count
+                            if 'webrtc_fps' not in st.session_state:
+                                st.session_state.webrtc_fps = 0
+                            elapsed = time.time() - self.start_time
+                            st.session_state.webrtc_fps = 5 / elapsed if elapsed > 0 else 0
+                            self.start_time = time.time()
+                        # Recognize each face
+                        result_frame = img.copy()
+                        for i, bbox in enumerate(bboxes):
+                            face_id = f"face_{i}"
+                            # Extract face embedding
+                            embedding = extract_face_embeddings(img, bbox, model_name=model_choice)
+                            if embedding is not None:
+                                # Compare with registered faces
+                                matches = []
+                                for name, info in st.session_state.face_database.items():
+                                    if 'embeddings' in info:
+                                        # New format with multiple embeddings
+                                        similarities = []
+                                        for idx, registered_embedding in enumerate(info['embeddings']):
+                                            # Use same model if possible
+                                            if info['models'][idx] == model_choice:
+                                                weight = 1.0  # Give more weight to embeddings from same model
+                                            else:
+                                                weight = 0.8  # Less weight for embeddings from other models
+                                            # Make sure embeddings are compatible
+                                            try:
+                                                similarity = cosine_similarity([embedding["embedding"]], [registered_embedding])[0][0] * 100 * weight
+                                                similarities.append(similarity)
+                                            except ValueError:
+                                                # If incompatible dimensions error, skip this comparison
+                                                continue
+                                        # Apply selected voting method
+                                        if similarities:
+                                            if voting_method == "Average":
+                                                final_similarity = sum(similarities) / len(similarities)
+                                            elif voting_method == "Best match":
+                                                final_similarity = max(similarities)
+                                            else:  # Weighted voting
+                                                # Give more weight to higher similarities
+                                                weighted_sum = sum(s * (i+1) for i, s in enumerate(sorted(similarities)))
+                                                weights_sum = sum(i+1 for i in range(len(similarities)))
+                                                final_similarity = weighted_sum / weights_sum
+                                            matches.append({"name": name, "similarity": final_similarity})
+                                    else:
+                                        # Old format with single embedding
+                                        registered_embedding = info['embedding']
+                                        try:
+                                            similarity = cosine_similarity([embedding["embedding"]], [registered_embedding])[0][0] * 100
+                                            matches.append({"name": name, "similarity": similarity})
+                                        except ValueError:
+                                            # If incompatible dimensions error, skip this comparison
+                                            continue
+                                # Sort matches by similarity
+                                matches.sort(key=lambda x: x["similarity"], reverse=True)
+                                # Get best match
+                                best_match = matches[0] if matches else None
+                                # Draw results on image
+                                x1, y1, x2, y2, _ = bbox
+                                if best_match and best_match["similarity"] >= similarity_threshold:
+                                    # Match found
+                                    # Color based on similarity level
+                                    if best_match["similarity"] >= 80:
+                                        color = (0, 255, 0)  # Green for high similarity
+                                    elif best_match["similarity"] >= 65:
+                                        color = (0, 255, 255)  # Yellow for medium similarity
+                                    else:
+                                        color = (0, 165, 255)  # Orange for low similarity
+                                    # Draw rectangle and label
+                                    cv2.rectangle(result_frame, (x1, y1), (x2, y2), color, 2)
+                                    if show_confidence:
+                                        label = f"{best_match['name']}: {best_match['similarity']:.1f}%"
+                                    else:
+                                        label = f"{best_match['name']}"
+                                    cv2.putText(result_frame, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
+                                else:
+                                    # No match
+                                    cv2.rectangle(result_frame, (x1, y1), (x2, y2), (0, 0, 255), 2)
+                                    if best_match:
+                                        label = f"Unknown: {best_match['similarity']:.1f}%" if show_confidence else "Unknown"
+                                    else:
+                                        label = "Unknown"
+                                    cv2.putText(result_frame, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
+                        return av.VideoFrame.from_ndarray(result_frame, format="bgr24")
+                # Display WebRTC streamer
+                webrtc_ctx = webrtc_streamer(
+                    key="face-recognition",
+                    mode=WebRtcMode.SENDRECV,
+                    rtc_configuration=rtc_configuration,
+                    video_processor_factory=VideoProcessor,
+                    media_stream_constraints={"video": True, "audio": False},
+                    async_processing=True,
+                )
+                # Update metrics if WebRTC is running
+                if webrtc_ctx.state.playing:
+                    # Use a separate thread to update metrics
+                    faces_metric.metric("Faces detected", st.session_state.get('webrtc_face_count', 0))
+                    fps_metric.metric("FPS", f"{st.session_state.get('webrtc_fps', 0):.1f}")
+                    time_metric.metric("Status", "Running")
+                    # Add instructions
+                    st.info("WebRTC camera is active. Face recognition is being processed in real-time.")
+                    st.warning("Note: For better performance, make sure you have good lighting and face the camera directly.")
                 else:
+                    faces_metric.metric("Faces detected", 0)
+                    fps_metric.metric("FPS", 0)
+                    time_metric.metric("Status", "Stopped")
+                    st.info("Click 'Start' to activate the camera and begin real-time face recognition.")
+                    st.warning("Note: If you're running this in Hugging Face Spaces, some browser permissions may be required.")
+                # Add a note about privacy
+                st.markdown("---")
+                st.markdown("**Privacy Note**: Video is processed in your browser and on the server. No video data is stored permanently.")
 # Si se ejecuta este archivo directamente, llamar a la función main
 if __name__ == "__main__":