Spaces:

jarondon82
/

ComputerVisionProject

Build error

App Files Files Community

jarondon82 commited on Mar 22, 2025

Commit

c2a7084

1 Parent(s): 39e4dcc

Implementación de captura automática continua usando componente HTML/JavaScript personalizado

Browse files

Files changed (1) hide show

streamlit_app.py +241 -113

streamlit_app.py CHANGED Viewed

@@ -2358,6 +2358,43 @@ def main():
                 st.markdown("### Continuous Capture Mode")
                 st.info("⚠️ Recommended mode for Hugging Face: Captures frames continuously with reliable camera access.")
                 col1, col2 = st.columns(2)
                 start_continuous = col1.button("Start Continuous Capture", key="start_continuous_button", use_container_width=True)
                 stop_continuous = col2.button("Stop Continuous Capture", key="stop_continuous_button", use_container_width=True)
@@ -2386,132 +2423,223 @@ def main():
                     fps_metric.metric("FPS", "Processing...")
                     time_metric.metric("Status", "Running")
-                    # Capturar imagen y procesarla
                     with camera_container:
-                        st.info("Continuous capture mode active. Processing frames automatically.")
-                        # Incrementar contador de frames para forzar una nueva captura en cada ciclo
-                        frame_key = f"continuous_frame_{st.session_state.get('frame_count', 0)}"
-                        captured_image = st.camera_input("Camera feed", key=frame_key)
-                        if captured_image is not None:
-                            try:
-                                # Procesar la imagen
-                                image_bytes = captured_image.getvalue()
-                                image = cv2.imdecode(np.frombuffer(image_bytes, np.uint8), cv2.IMREAD_COLOR)
-                                if image is not None and image.size > 0:
-                                    # Detectar rostros
-                                    bboxes = detect_face_dnn(face_net, image, confidence_threshold)
-                                    # Actualizar métricas
-                                    faces_metric.metric("Faces detected", len(bboxes))
-                                    # Incrementar contador de frames procesados
-                                    st.session_state.frames_processed += 1
-                                    # Calcular FPS real (actualizar cada segundo)
-                                    current_time = time.time()
-                                    elapsed = current_time - st.session_state.start_time
-                                    if current_time - st.session_state.last_fps_update >= 1.0:
-                                        fps = st.session_state.frames_processed / elapsed
-                                        fps_metric.metric("FPS", f"{fps:.1f}")
-                                        st.session_state.last_fps_update = current_time
-                                    # Dibujar resultados
-                                    result_img = image.copy()
-                                    for i, bbox in enumerate(bboxes):
-                                        x1, y1, x2, y2, conf = bbox
-                                        cv2.rectangle(result_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
-                                        cv2.putText(result_img, f"Face {i+1}: {conf:.2f}", (x1, y1-10),
-                                                  cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
-                                    # Mostrar resultado
-                                    with result_container:
-                                        st.image(result_img, channels="BGR", caption=f"Frame {st.session_state.frames_processed}", use_container_width=True)
-                                    # Si hay rostros y hay una base de datos, intentar reconocerlos
-                                    if len(bboxes) > 0 and st.session_state.face_database and len(st.session_state.face_database) > 0:
-                                        recognition_results = []
                                         for i, bbox in enumerate(bboxes):
-                                            x1, y1, x2, y2, _ = bbox
-                                            face_img = image[y1:y2, x1:x2]
-                                            # Extraer el embedding del rostro con el modelo seleccionado
-                                            if model_choice == "VGG-Face":
-                                                embedding = vggface_model(face_img)
-                                            elif model_choice == "Facenet":
-                                                embedding = facenet_model(face_img)
-                                            elif model_choice == "OpenFace":
-                                                embedding = openface_model(face_img)
-                                            elif model_choice == "ArcFace":
-                                                embedding = arcface_model(face_img)
-                                            else:
-                                                embedding = vggface_model(face_img)
-                                            # Comparar con rostros registrados
-                                            best_match = None
-                                            best_similarity = -1
-                                            for name, info in st.session_state.face_database.items():
-                                                if 'embeddings' in info and info['embeddings']:
-                                                    # Buscar embedding del mismo modelo
-                                                    for emb in info['embeddings']:
-                                                        if isinstance(emb, dict) and 'model' in emb and emb['model'] == model_choice:
-                                                            stored_emb = emb['embedding']
-                                                            similarity = cosine_similarity(embedding, stored_emb)
-                                                            if similarity > similarity_threshold/100 and similarity > best_similarity:
-                                                                best_similarity = similarity
-                                                                best_match = name
-                                            if best_match is not None:
-                                                recognition_results.append({
-                                                    'bbox': bbox,
-                                                    'name': best_match,
-                                                    'similarity': best_similarity
-                                                })
-                                        # Mostrar resultados de reconocimiento
-                                        if recognition_results:
-                                            result_with_names = result_img.copy()
-                                            for result in recognition_results:
-                                                x1, y1, x2, y2, _ = result['bbox']
-                                                name = result['name']
-                                                similarity = result['similarity']
-                                                # Dibujar nombre y similitud
-                                                cv2.rectangle(result_with_names, (x1, y1), (x2, y2), (0, 255, 0), 2)
-                                                label = f"{name}: {similarity:.2f}"
-                                                cv2.putText(result_with_names, label, (x1, y1-10),
-                                                          cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
-                                            with result_container:
-                                                st.image(result_with_names, channels="BGR", caption="Recognized faces", use_container_width=True)
-                                                # Mostrar tabla de resultados
-                                                results_df = pd.DataFrame([
-                                                    {"Name": r['name'], "Confidence": f"{r['similarity']:.2f}"}
-                                                    for r in recognition_results
-                                                ])
-                                                st.table(results_df)
-                                    # Incrementar contador para siguiente frame
-                                    st.session_state.frame_count += 1
-                                    # Recargar para capturar siguiente frame (si todavía está activo)
-                                    if st.session_state.get('continuous_capture', False):
-                                        time.sleep(0.1)  # Pequeña pausa para evitar sobrecarga
-                                        st.experimental_rerun()
-                                else:
-                                    st.error("Could not process the image. Try taking another photo.")
-                            except Exception as e:
-                                st.error(f"Error processing image: {str(e)}")
-                                st.info("Try again or use another camera mode.")
                 # Añadir opción de cámara alternativa para entornos donde WebRTC no funciona bien
                 st.markdown("---")
                 st.markdown("### Alternative Camera Mode")

                 st.markdown("### Continuous Capture Mode")
                 st.info("⚠️ Recommended mode for Hugging Face: Captures frames continuously with reliable camera access.")
+                # Configuración del modo de captura continua
+                with st.expander("Configuration", expanded=False):
+                    continuous_model_choice = st.selectbox(
+                        "Embedding model for recognition",
+                        ["VGG-Face", "Facenet", "OpenFace", "ArcFace"],
+                        key="continuous_model_choice",
+                        index=0 if "continuous_model_choice" not in st.session_state else ["VGG-Face", "Facenet", "OpenFace", "ArcFace"].index(st.session_state.continuous_model_choice)
+                    )
+                    continuous_similarity_threshold = st.slider(
+                        "Similarity threshold (%)",
+                        min_value=35.0,
+                        max_value=95.0,
+                        value=45.0,
+                        step=5.0,
+                        key="continuous_similarity_threshold"
+                    )
+                    continuous_confidence_threshold = st.slider(
+                        "Detection confidence",
+                        min_value=0.3,
+                        max_value=0.9,
+                        value=0.5,
+                        step=0.05,
+                        key="continuous_confidence_threshold"
+                    )
+                    capture_fps = st.slider(
+                        "Capture frames per second",
+                        min_value=0.5,
+                        max_value=5.0,
+                        value=1.0,
+                        step=0.5,
+                        key="capture_fps",
+                        help="Higher values capture more frames but may overload the system"
+                    )
                 col1, col2 = st.columns(2)
                 start_continuous = col1.button("Start Continuous Capture", key="start_continuous_button", use_container_width=True)
                 stop_continuous = col2.button("Stop Continuous Capture", key="stop_continuous_button", use_container_width=True)
                     fps_metric.metric("FPS", "Processing...")
                     time_metric.metric("Status", "Running")
+                    # Usar un componente personalizado con JavaScript para captura automática
                     with camera_container:
+                        st.info("Auto-capture enabled. Camera should start automatically.")
+                        # Componente HTML/JavaScript para acceder a la cámara automáticamente
+                        camera_html = """
+                        <div style="margin-bottom: 20px;">
+                            <video id="webcam" autoplay playsinline width="640" height="480" style="border-radius: 5px;"></video>
+                            <canvas id="canvas" width="640" height="480" style="display: none;"></canvas>
+                        </div>
+                        <script>
+                            const video = document.getElementById('webcam');
+                            const canvas = document.getElementById('canvas');
+                            const ctx = canvas.getContext('2d');
+                            let captureInterval;
+                            // Configuración dinámica del FPS (desde Streamlit)
+                            const captureDelay = 1000 / %s;
+                            // Iniciar la cámara
+                            async function setupCamera() {
+                                try {
+                                    const stream = await navigator.mediaDevices.getUserMedia({
+                                        'video': { width: 640, height: 480 },
+                                        'audio': false
+                                    });
+                                    video.srcObject = stream;
+                                    // Esperar a que la cámara esté lista
+                                    return new Promise((resolve) => {
+                                        video.onloadedmetadata = () => {
+                                            video.play();
+                                            resolve(video);
+                                        };
+                                    });
+                                } catch (error) {
+                                    console.error('Error accessing camera:', error);
+                                    window.parent.postMessage({
+                                        type: 'streamlit:setComponentValue',
+                                        value: { error: 'Camera access denied or not available' }
+                                    }, '*');
+                                }
+                            }
+                            // Capturar frame y enviar a Streamlit
+                            function captureFrame() {
+                                if (video.readyState === video.HAVE_ENOUGH_DATA) {
+                                    // Dibujar el video en el canvas
+                                    ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
+                                    // Convertir a base64
+                                    const imageData = canvas.toDataURL('image/jpeg', 0.8);
+                                    // Enviar los datos a Streamlit
+                                    window.parent.postMessage({
+                                        type: 'streamlit:setComponentValue',
+                                        value: { image: imageData, timestamp: Date.now() }
+                                    }, '*');
+                                }
+                            }
+                            // Arrancar todo
+                            async function initCapture() {
+                                await setupCamera();
+                                // Empezar a capturar frames periódicamente
+                                captureInterval = setInterval(captureFrame, captureDelay);
+                            }
+                            // Limpiar al salir
+                            function stopCapture() {
+                                clearInterval(captureInterval);
+                                if (video.srcObject) {
+                                    video.srcObject.getTracks().forEach(track => track.stop());
+                                }
+                            }
+                            // Iniciar captura automáticamente
+                            initCapture();
+                            // Limpiar cuando se desmonte el componente
+                            window.addEventListener('beforeunload', stopCapture);
+                        </script>
+                        """ % st.session_state.get('capture_fps', 1.0)
+                        # Renderizar el componente
+                        camera_component = st.components.v1.html(camera_html, height=520)
+                        # Procesar la imagen si está disponible (desde JavaScript)
+                        if camera_component is not None and isinstance(camera_component, dict):
+                            if 'error' in camera_component:
+                                st.error(f"Camera error: {camera_component['error']}")
+                            elif 'image' in camera_component:
+                                try:
+                                    # Convertir image base64 a imagen OpenCV
+                                    encoded_data = camera_component['image'].split(',')[1]
+                                    nparr = np.frombuffer(base64.b64decode(encoded_data), np.uint8)
+                                    image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+                                    if image is not None and image.size > 0:
+                                        # Usar la configuración local
+                                        local_confidence = st.session_state.get('continuous_confidence_threshold', 0.5)
+                                        # Detectar rostros
+                                        bboxes = detect_face_dnn(face_net, image, local_confidence)
+                                        # Actualizar métricas
+                                        faces_metric.metric("Faces detected", len(bboxes))
+                                        # Incrementar contador de frames procesados
+                                        st.session_state.frames_processed += 1
+                                        # Calcular FPS real (actualizar cada segundo)
+                                        current_time = time.time()
+                                        elapsed = current_time - st.session_state.start_time
+                                        if current_time - st.session_state.last_fps_update >= 1.0:
+                                            fps = st.session_state.frames_processed / elapsed
+                                            fps_metric.metric("FPS", f"{fps:.1f}")
+                                            st.session_state.last_fps_update = current_time
+                                        # Dibujar resultados
+                                        result_img = image.copy()
                                         for i, bbox in enumerate(bboxes):
+                                            x1, y1, x2, y2, conf = bbox
+                                            cv2.rectangle(result_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
+                                            cv2.putText(result_img, f"Face {i+1}: {conf:.2f}", (x1, y1-10),
+                                                      cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
+                                        # Mostrar resultado
+                                        with result_container:
+                                            st.image(result_img, channels="BGR", caption="Real-time capture", use_container_width=True)
+                                        # Si hay rostros y hay una base de datos, intentar reconocerlos
+                                        if len(bboxes) > 0 and st.session_state.face_database and len(st.session_state.face_database) > 0:
+                                            recognition_results = []
+                                            # Usar la configuración local
+                                            local_model = st.session_state.get('continuous_model_choice', 'VGG-Face')
+                                            local_threshold = st.session_state.get('continuous_similarity_threshold', 45.0) / 100.0
+                                            for i, bbox in enumerate(bboxes):
+                                                x1, y1, x2, y2, _ = bbox
+                                                face_img = image[y1:y2, x1:x2]
+                                                # Extraer el embedding del rostro con el modelo seleccionado
+                                                if local_model == "VGG-Face":
+                                                    embedding = vggface_model(face_img)
+                                                elif local_model == "Facenet":
+                                                    embedding = facenet_model(face_img)
+                                                elif local_model == "OpenFace":
+                                                    embedding = openface_model(face_img)
+                                                elif local_model == "ArcFace":
+                                                    embedding = arcface_model(face_img)
+                                                else:
+                                                    embedding = vggface_model(face_img)
+                                                # Comparar con rostros registrados
+                                                best_match = None
+                                                best_similarity = -1
+                                                for name, info in st.session_state.face_database.items():
+                                                    if 'embeddings' in info and info['embeddings']:
+                                                        # Buscar embedding del mismo modelo
+                                                        for emb in info['embeddings']:
+                                                            if isinstance(emb, dict) and 'model' in emb and emb['model'] == local_model:
+                                                                stored_emb = emb['embedding']
+                                                                similarity = cosine_similarity(embedding, stored_emb)
+                                                                if similarity > local_threshold and similarity > best_similarity:
+                                                                    best_similarity = similarity
+                                                                    best_match = name
+                                                            elif not isinstance(emb, dict) and 'models' in info and local_model in info['models']:
+                                                                # Compatibilidad con formato anterior
+                                                                model_idx = info['models'].index(local_model)
+                                                                if model_idx < len(info['embeddings']):
+                                                                    stored_emb = info['embeddings'][model_idx]
+                                                                    similarity = cosine_similarity(embedding, stored_emb)
+                                                                    if similarity > local_threshold and similarity > best_similarity:
+                                                                        best_similarity = similarity
+                                                                        best_match = name
+                                                if best_match is not None:
+                                                    recognition_results.append({
+                                                        'bbox': bbox,
+                                                        'name': best_match,
+                                                        'similarity': best_similarity
+                                                    })
+                                            # Mostrar resultados de reconocimiento
+                                            if recognition_results:
+                                                result_with_names = result_img.copy()
+                                                for result in recognition_results:
+                                                    x1, y1, x2, y2, _ = result['bbox']
+                                                    name = result['name']
+                                                    similarity = result['similarity']
+                                                    # Dibujar nombre y similitud
+                                                    cv2.rectangle(result_with_names, (x1, y1), (x2, y2), (0, 255, 0), 2)
+                                                    label = f"{name}: {similarity:.2f}"
+                                                    cv2.putText(result_with_names, label, (x1, y1-10),
+                                                              cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
+                                                with result_container:
+                                                    st.image(result_with_names, channels="BGR", caption="Recognized faces", use_container_width=True)
+                                                    # Mostrar tabla de resultados
+                                                    results_df = pd.DataFrame([
+                                                        {"Name": r['name'], "Confidence": f"{r['similarity']:.2f}"}
+                                                        for r in recognition_results
+                                                    ])
+                                                    st.table(results_df)
+                                except Exception as e:
+                                    st.error(f"Error processing camera frame: {str(e)}")
+                                    st.info("Camera continues to run. Processing will be attempted on next frame.")
                 # Añadir opción de cámara alternativa para entornos donde WebRTC no funciona bien
                 st.markdown("---")
                 st.markdown("### Alternative Camera Mode")