jarondon82 commited on
Commit
c2a7084
·
1 Parent(s): 39e4dcc

Implementación de captura automática continua usando componente HTML/JavaScript personalizado

Browse files
Files changed (1) hide show
  1. streamlit_app.py +241 -113
streamlit_app.py CHANGED
@@ -2358,6 +2358,43 @@ def main():
2358
  st.markdown("### Continuous Capture Mode")
2359
  st.info("⚠️ Recommended mode for Hugging Face: Captures frames continuously with reliable camera access.")
2360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2361
  col1, col2 = st.columns(2)
2362
  start_continuous = col1.button("Start Continuous Capture", key="start_continuous_button", use_container_width=True)
2363
  stop_continuous = col2.button("Stop Continuous Capture", key="stop_continuous_button", use_container_width=True)
@@ -2386,132 +2423,223 @@ def main():
2386
  fps_metric.metric("FPS", "Processing...")
2387
  time_metric.metric("Status", "Running")
2388
 
2389
- # Capturar imagen y procesarla
2390
  with camera_container:
2391
- st.info("Continuous capture mode active. Processing frames automatically.")
2392
-
2393
- # Incrementar contador de frames para forzar una nueva captura en cada ciclo
2394
- frame_key = f"continuous_frame_{st.session_state.get('frame_count', 0)}"
2395
- captured_image = st.camera_input("Camera feed", key=frame_key)
2396
 
2397
- if captured_image is not None:
2398
- try:
2399
- # Procesar la imagen
2400
- image_bytes = captured_image.getvalue()
2401
- image = cv2.imdecode(np.frombuffer(image_bytes, np.uint8), cv2.IMREAD_COLOR)
2402
-
2403
- if image is not None and image.size > 0:
2404
- # Detectar rostros
2405
- bboxes = detect_face_dnn(face_net, image, confidence_threshold)
2406
-
2407
- # Actualizar métricas
2408
- faces_metric.metric("Faces detected", len(bboxes))
2409
-
2410
- # Incrementar contador de frames procesados
2411
- st.session_state.frames_processed += 1
 
 
 
 
 
 
 
 
2412
 
2413
- # Calcular FPS real (actualizar cada segundo)
2414
- current_time = time.time()
2415
- elapsed = current_time - st.session_state.start_time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2416
 
2417
- if current_time - st.session_state.last_fps_update >= 1.0:
2418
- fps = st.session_state.frames_processed / elapsed
2419
- fps_metric.metric("FPS", f"{fps:.1f}")
2420
- st.session_state.last_fps_update = current_time
2421
 
2422
- # Dibujar resultados
2423
- result_img = image.copy()
2424
- for i, bbox in enumerate(bboxes):
2425
- x1, y1, x2, y2, conf = bbox
2426
- cv2.rectangle(result_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
2427
- cv2.putText(result_img, f"Face {i+1}: {conf:.2f}", (x1, y1-10),
2428
- cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
2429
-
2430
- # Mostrar resultado
2431
- with result_container:
2432
- st.image(result_img, channels="BGR", caption=f"Frame {st.session_state.frames_processed}", use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2433
 
2434
- # Si hay rostros y hay una base de datos, intentar reconocerlos
2435
- if len(bboxes) > 0 and st.session_state.face_database and len(st.session_state.face_database) > 0:
2436
- recognition_results = []
 
 
 
 
 
 
 
 
 
2437
 
 
 
 
 
 
 
 
 
 
 
 
2438
  for i, bbox in enumerate(bboxes):
2439
- x1, y1, x2, y2, _ = bbox
2440
- face_img = image[y1:y2, x1:x2]
2441
-
2442
- # Extraer el embedding del rostro con el modelo seleccionado
2443
- if model_choice == "VGG-Face":
2444
- embedding = vggface_model(face_img)
2445
- elif model_choice == "Facenet":
2446
- embedding = facenet_model(face_img)
2447
- elif model_choice == "OpenFace":
2448
- embedding = openface_model(face_img)
2449
- elif model_choice == "ArcFace":
2450
- embedding = arcface_model(face_img)
2451
- else:
2452
- embedding = vggface_model(face_img)
2453
-
2454
- # Comparar con rostros registrados
2455
- best_match = None
2456
- best_similarity = -1
2457
-
2458
- for name, info in st.session_state.face_database.items():
2459
- if 'embeddings' in info and info['embeddings']:
2460
- # Buscar embedding del mismo modelo
2461
- for emb in info['embeddings']:
2462
- if isinstance(emb, dict) and 'model' in emb and emb['model'] == model_choice:
2463
- stored_emb = emb['embedding']
2464
- similarity = cosine_similarity(embedding, stored_emb)
2465
-
2466
- if similarity > similarity_threshold/100 and similarity > best_similarity:
2467
- best_similarity = similarity
2468
- best_match = name
2469
-
2470
- if best_match is not None:
2471
- recognition_results.append({
2472
- 'bbox': bbox,
2473
- 'name': best_match,
2474
- 'similarity': best_similarity
2475
- })
2476
 
2477
- # Mostrar resultados de reconocimiento
2478
- if recognition_results:
2479
- result_with_names = result_img.copy()
 
 
 
 
2480
 
2481
- for result in recognition_results:
2482
- x1, y1, x2, y2, _ = result['bbox']
2483
- name = result['name']
2484
- similarity = result['similarity']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2485
 
2486
- # Dibujar nombre y similitud
2487
- cv2.rectangle(result_with_names, (x1, y1), (x2, y2), (0, 255, 0), 2)
2488
- label = f"{name}: {similarity:.2f}"
2489
- cv2.putText(result_with_names, label, (x1, y1-10),
2490
- cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2491
 
2492
- with result_container:
2493
- st.image(result_with_names, channels="BGR", caption="Recognized faces", use_container_width=True)
 
2494
 
2495
- # Mostrar tabla de resultados
2496
- results_df = pd.DataFrame([
2497
- {"Name": r['name'], "Confidence": f"{r['similarity']:.2f}"}
2498
- for r in recognition_results
2499
- ])
2500
- st.table(results_df)
2501
-
2502
- # Incrementar contador para siguiente frame
2503
- st.session_state.frame_count += 1
2504
-
2505
- # Recargar para capturar siguiente frame (si todavía está activo)
2506
- if st.session_state.get('continuous_capture', False):
2507
- time.sleep(0.1) # Pequeña pausa para evitar sobrecarga
2508
- st.experimental_rerun()
2509
- else:
2510
- st.error("Could not process the image. Try taking another photo.")
2511
- except Exception as e:
2512
- st.error(f"Error processing image: {str(e)}")
2513
- st.info("Try again or use another camera mode.")
2514
-
 
 
 
 
2515
  # Añadir opción de cámara alternativa para entornos donde WebRTC no funciona bien
2516
  st.markdown("---")
2517
  st.markdown("### Alternative Camera Mode")
 
2358
  st.markdown("### Continuous Capture Mode")
2359
  st.info("⚠️ Recommended mode for Hugging Face: Captures frames continuously with reliable camera access.")
2360
 
2361
+ # Configuración del modo de captura continua
2362
+ with st.expander("Configuration", expanded=False):
2363
+ continuous_model_choice = st.selectbox(
2364
+ "Embedding model for recognition",
2365
+ ["VGG-Face", "Facenet", "OpenFace", "ArcFace"],
2366
+ key="continuous_model_choice",
2367
+ index=0 if "continuous_model_choice" not in st.session_state else ["VGG-Face", "Facenet", "OpenFace", "ArcFace"].index(st.session_state.continuous_model_choice)
2368
+ )
2369
+
2370
+ continuous_similarity_threshold = st.slider(
2371
+ "Similarity threshold (%)",
2372
+ min_value=35.0,
2373
+ max_value=95.0,
2374
+ value=45.0,
2375
+ step=5.0,
2376
+ key="continuous_similarity_threshold"
2377
+ )
2378
+
2379
+ continuous_confidence_threshold = st.slider(
2380
+ "Detection confidence",
2381
+ min_value=0.3,
2382
+ max_value=0.9,
2383
+ value=0.5,
2384
+ step=0.05,
2385
+ key="continuous_confidence_threshold"
2386
+ )
2387
+
2388
+ capture_fps = st.slider(
2389
+ "Capture frames per second",
2390
+ min_value=0.5,
2391
+ max_value=5.0,
2392
+ value=1.0,
2393
+ step=0.5,
2394
+ key="capture_fps",
2395
+ help="Higher values capture more frames but may overload the system"
2396
+ )
2397
+
2398
  col1, col2 = st.columns(2)
2399
  start_continuous = col1.button("Start Continuous Capture", key="start_continuous_button", use_container_width=True)
2400
  stop_continuous = col2.button("Stop Continuous Capture", key="stop_continuous_button", use_container_width=True)
 
2423
  fps_metric.metric("FPS", "Processing...")
2424
  time_metric.metric("Status", "Running")
2425
 
2426
+ # Usar un componente personalizado con JavaScript para captura automática
2427
  with camera_container:
2428
+ st.info("Auto-capture enabled. Camera should start automatically.")
 
 
 
 
2429
 
2430
+ # Componente HTML/JavaScript para acceder a la cámara automáticamente
2431
+ camera_html = """
2432
+ <div style="margin-bottom: 20px;">
2433
+ <video id="webcam" autoplay playsinline width="640" height="480" style="border-radius: 5px;"></video>
2434
+ <canvas id="canvas" width="640" height="480" style="display: none;"></canvas>
2435
+ </div>
2436
+ <script>
2437
+ const video = document.getElementById('webcam');
2438
+ const canvas = document.getElementById('canvas');
2439
+ const ctx = canvas.getContext('2d');
2440
+ let captureInterval;
2441
+
2442
+ // Configuración dinámica del FPS (desde Streamlit)
2443
+ const captureDelay = 1000 / %s;
2444
+
2445
+ // Iniciar la cámara
2446
+ async function setupCamera() {
2447
+ try {
2448
+ const stream = await navigator.mediaDevices.getUserMedia({
2449
+ 'video': { width: 640, height: 480 },
2450
+ 'audio': false
2451
+ });
2452
+ video.srcObject = stream;
2453
 
2454
+ // Esperar a que la cámara esté lista
2455
+ return new Promise((resolve) => {
2456
+ video.onloadedmetadata = () => {
2457
+ video.play();
2458
+ resolve(video);
2459
+ };
2460
+ });
2461
+ } catch (error) {
2462
+ console.error('Error accessing camera:', error);
2463
+ window.parent.postMessage({
2464
+ type: 'streamlit:setComponentValue',
2465
+ value: { error: 'Camera access denied or not available' }
2466
+ }, '*');
2467
+ }
2468
+ }
2469
+
2470
+ // Capturar frame y enviar a Streamlit
2471
+ function captureFrame() {
2472
+ if (video.readyState === video.HAVE_ENOUGH_DATA) {
2473
+ // Dibujar el video en el canvas
2474
+ ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
2475
 
2476
+ // Convertir a base64
2477
+ const imageData = canvas.toDataURL('image/jpeg', 0.8);
 
 
2478
 
2479
+ // Enviar los datos a Streamlit
2480
+ window.parent.postMessage({
2481
+ type: 'streamlit:setComponentValue',
2482
+ value: { image: imageData, timestamp: Date.now() }
2483
+ }, '*');
2484
+ }
2485
+ }
2486
+
2487
+ // Arrancar todo
2488
+ async function initCapture() {
2489
+ await setupCamera();
2490
+ // Empezar a capturar frames periódicamente
2491
+ captureInterval = setInterval(captureFrame, captureDelay);
2492
+ }
2493
+
2494
+ // Limpiar al salir
2495
+ function stopCapture() {
2496
+ clearInterval(captureInterval);
2497
+ if (video.srcObject) {
2498
+ video.srcObject.getTracks().forEach(track => track.stop());
2499
+ }
2500
+ }
2501
+
2502
+ // Iniciar captura automáticamente
2503
+ initCapture();
2504
+
2505
+ // Limpiar cuando se desmonte el componente
2506
+ window.addEventListener('beforeunload', stopCapture);
2507
+ </script>
2508
+ """ % st.session_state.get('capture_fps', 1.0)
2509
+
2510
+ # Renderizar el componente
2511
+ camera_component = st.components.v1.html(camera_html, height=520)
2512
+
2513
+ # Procesar la imagen si está disponible (desde JavaScript)
2514
+ if camera_component is not None and isinstance(camera_component, dict):
2515
+ if 'error' in camera_component:
2516
+ st.error(f"Camera error: {camera_component['error']}")
2517
+ elif 'image' in camera_component:
2518
+ try:
2519
+ # Convertir image base64 a imagen OpenCV
2520
+ encoded_data = camera_component['image'].split(',')[1]
2521
+ nparr = np.frombuffer(base64.b64decode(encoded_data), np.uint8)
2522
+ image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
2523
 
2524
+ if image is not None and image.size > 0:
2525
+ # Usar la configuración local
2526
+ local_confidence = st.session_state.get('continuous_confidence_threshold', 0.5)
2527
+
2528
+ # Detectar rostros
2529
+ bboxes = detect_face_dnn(face_net, image, local_confidence)
2530
+
2531
+ # Actualizar métricas
2532
+ faces_metric.metric("Faces detected", len(bboxes))
2533
+
2534
+ # Incrementar contador de frames procesados
2535
+ st.session_state.frames_processed += 1
2536
 
2537
+ # Calcular FPS real (actualizar cada segundo)
2538
+ current_time = time.time()
2539
+ elapsed = current_time - st.session_state.start_time
2540
+
2541
+ if current_time - st.session_state.last_fps_update >= 1.0:
2542
+ fps = st.session_state.frames_processed / elapsed
2543
+ fps_metric.metric("FPS", f"{fps:.1f}")
2544
+ st.session_state.last_fps_update = current_time
2545
+
2546
+ # Dibujar resultados
2547
+ result_img = image.copy()
2548
  for i, bbox in enumerate(bboxes):
2549
+ x1, y1, x2, y2, conf = bbox
2550
+ cv2.rectangle(result_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
2551
+ cv2.putText(result_img, f"Face {i+1}: {conf:.2f}", (x1, y1-10),
2552
+ cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2553
 
2554
+ # Mostrar resultado
2555
+ with result_container:
2556
+ st.image(result_img, channels="BGR", caption="Real-time capture", use_container_width=True)
2557
+
2558
+ # Si hay rostros y hay una base de datos, intentar reconocerlos
2559
+ if len(bboxes) > 0 and st.session_state.face_database and len(st.session_state.face_database) > 0:
2560
+ recognition_results = []
2561
 
2562
+ # Usar la configuración local
2563
+ local_model = st.session_state.get('continuous_model_choice', 'VGG-Face')
2564
+ local_threshold = st.session_state.get('continuous_similarity_threshold', 45.0) / 100.0
2565
+
2566
+ for i, bbox in enumerate(bboxes):
2567
+ x1, y1, x2, y2, _ = bbox
2568
+ face_img = image[y1:y2, x1:x2]
2569
+
2570
+ # Extraer el embedding del rostro con el modelo seleccionado
2571
+ if local_model == "VGG-Face":
2572
+ embedding = vggface_model(face_img)
2573
+ elif local_model == "Facenet":
2574
+ embedding = facenet_model(face_img)
2575
+ elif local_model == "OpenFace":
2576
+ embedding = openface_model(face_img)
2577
+ elif local_model == "ArcFace":
2578
+ embedding = arcface_model(face_img)
2579
+ else:
2580
+ embedding = vggface_model(face_img)
2581
+
2582
+ # Comparar con rostros registrados
2583
+ best_match = None
2584
+ best_similarity = -1
2585
 
2586
+ for name, info in st.session_state.face_database.items():
2587
+ if 'embeddings' in info and info['embeddings']:
2588
+ # Buscar embedding del mismo modelo
2589
+ for emb in info['embeddings']:
2590
+ if isinstance(emb, dict) and 'model' in emb and emb['model'] == local_model:
2591
+ stored_emb = emb['embedding']
2592
+ similarity = cosine_similarity(embedding, stored_emb)
2593
+
2594
+ if similarity > local_threshold and similarity > best_similarity:
2595
+ best_similarity = similarity
2596
+ best_match = name
2597
+ elif not isinstance(emb, dict) and 'models' in info and local_model in info['models']:
2598
+ # Compatibilidad con formato anterior
2599
+ model_idx = info['models'].index(local_model)
2600
+ if model_idx < len(info['embeddings']):
2601
+ stored_emb = info['embeddings'][model_idx]
2602
+ similarity = cosine_similarity(embedding, stored_emb)
2603
+
2604
+ if similarity > local_threshold and similarity > best_similarity:
2605
+ best_similarity = similarity
2606
+ best_match = name
2607
+
2608
+ if best_match is not None:
2609
+ recognition_results.append({
2610
+ 'bbox': bbox,
2611
+ 'name': best_match,
2612
+ 'similarity': best_similarity
2613
+ })
2614
 
2615
+ # Mostrar resultados de reconocimiento
2616
+ if recognition_results:
2617
+ result_with_names = result_img.copy()
2618
 
2619
+ for result in recognition_results:
2620
+ x1, y1, x2, y2, _ = result['bbox']
2621
+ name = result['name']
2622
+ similarity = result['similarity']
2623
+
2624
+ # Dibujar nombre y similitud
2625
+ cv2.rectangle(result_with_names, (x1, y1), (x2, y2), (0, 255, 0), 2)
2626
+ label = f"{name}: {similarity:.2f}"
2627
+ cv2.putText(result_with_names, label, (x1, y1-10),
2628
+ cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
2629
+
2630
+ with result_container:
2631
+ st.image(result_with_names, channels="BGR", caption="Recognized faces", use_container_width=True)
2632
+
2633
+ # Mostrar tabla de resultados
2634
+ results_df = pd.DataFrame([
2635
+ {"Name": r['name'], "Confidence": f"{r['similarity']:.2f}"}
2636
+ for r in recognition_results
2637
+ ])
2638
+ st.table(results_df)
2639
+ except Exception as e:
2640
+ st.error(f"Error processing camera frame: {str(e)}")
2641
+ st.info("Camera continues to run. Processing will be attempted on next frame.")
2642
+
2643
  # Añadir opción de cámara alternativa para entornos donde WebRTC no funciona bien
2644
  st.markdown("---")
2645
  st.markdown("### Alternative Camera Mode")