jarondon82 commited on
Commit
19ebc3a
·
1 Parent(s): 44a87dc

Implementada WebRTC para reconocimiento en tiempo real

Browse files
Files changed (2) hide show
  1. requirements.txt +8 -5
  2. streamlit_app.py +163 -232
requirements.txt CHANGED
@@ -1,9 +1,9 @@
1
  streamlit==1.31.0
2
  opencv-python-headless==4.8.0.76
3
- numpy==1.23.5
4
- Pillow==10.0.0
5
- scikit-learn==1.0.2
6
- matplotlib==3.7.2
7
  pandas==2.0.3
8
  tensorflow==2.12.0
9
  keras==2.12.0
@@ -12,4 +12,7 @@ mtcnn==0.1.1
12
  retina-face==0.0.12
13
  requests==2.32.2
14
  dlib-binary==19.24.1
15
- deepface==0.0.79
 
 
 
 
1
  streamlit==1.31.0
2
  opencv-python-headless==4.8.0.76
3
+ numpy==1.24.4
4
+ Pillow==10.2.0
5
+ scikit-learn==1.5.0
6
+ matplotlib==3.9.0
7
  pandas==2.0.3
8
  tensorflow==2.12.0
9
  keras==2.12.0
 
12
  retina-face==0.0.12
13
  requests==2.32.2
14
  dlib-binary==19.24.1
15
+ deepface==0.0.82
16
+ streamlit-webrtc==0.47.1
17
+ gdown==5.1.0
18
+ av==10.0.0
streamlit_app.py CHANGED
@@ -12,6 +12,8 @@ import matplotlib.pyplot as plt
12
  import pickle
13
  from sklearn.metrics.pairwise import cosine_similarity # type: ignore
14
  import pandas as pd
 
 
15
 
16
  # Importar las utilidades para la base de datos de rostros
17
  try:
@@ -2085,7 +2087,7 @@ def main():
2085
  st.warning("No faces registered. Please register at least one face first.")
2086
  else:
2087
  # Configuración avanzada
2088
- with st.expander("Configuración avanzada", expanded=False):
2089
  # Configuración de umbral de similitud
2090
  similarity_threshold = st.slider(
2091
  "Similarity threshold (%)",
@@ -2094,7 +2096,7 @@ def main():
2094
  value=45.0,
2095
  step=5.0,
2096
  key="realtime_threshold",
2097
- help="Porcentaje mínimo de similitud para considerar una coincidencia"
2098
  )
2099
 
2100
  confidence_threshold = st.slider(
@@ -2104,61 +2106,34 @@ def main():
2104
  value=0.5,
2105
  step=0.05,
2106
  key="realtime_confidence",
2107
- help="Un valor más alto es más restrictivo pero más preciso"
2108
  )
2109
 
2110
  model_choice = st.selectbox(
2111
  "Embedding model",
2112
  ["VGG-Face", "Facenet", "OpenFace", "ArcFace"],
2113
  key="realtime_model",
2114
- help="Diferentes modelos pueden dar resultados distintos según las características faciales"
2115
  )
2116
 
2117
  voting_method = st.radio(
2118
- "Método de votación para múltiples embeddings",
2119
- ["Promedio", "Mejor coincidencia", "Votación ponderada"],
2120
  key="realtime_voting",
2121
- help="Cómo combinar resultados cuando hay múltiples imágenes de una persona"
2122
  )
2123
 
2124
  show_confidence = st.checkbox(
2125
- "Mostrar porcentaje de confianza",
2126
  value=True,
2127
- help="Mostrar el porcentaje de similitud junto al nombre"
2128
  )
2129
 
2130
  stabilize_results = st.checkbox(
2131
- "Estabilizar resultados",
2132
  value=True,
2133
- help="Reduce fluctuaciones en la identificación usando un promedio temporal"
2134
  )
2135
-
2136
- fps_limit = st.slider(
2137
- "Límite de FPS",
2138
- min_value=5,
2139
- max_value=30,
2140
- value=15,
2141
- step=1,
2142
- help="Limitar los frames por segundo para reducir uso de CPU"
2143
- )
2144
-
2145
- # Inicializar estado de la cámara
2146
- if 'recognition_camera_running' not in st.session_state:
2147
- st.session_state.recognition_camera_running = False
2148
-
2149
- # Inicializar historial de reconocimiento para estabilización
2150
- if 'recognition_history' not in st.session_state:
2151
- st.session_state.recognition_history = {}
2152
-
2153
- # Botones para controlar la cámara
2154
- col1, col2 = st.columns(2)
2155
- start_button = col1.button("Iniciar Cámara", key="start_recognition_camera",
2156
- on_click=lambda: setattr(st.session_state, 'recognition_camera_running', True))
2157
- stop_button = col2.button("Detener Cámara", key="stop_recognition_camera",
2158
- on_click=lambda: setattr(st.session_state, 'recognition_camera_running', False))
2159
-
2160
- # Placeholder para el video
2161
- video_placeholder = st.empty()
2162
 
2163
  # Placeholder para métricas
2164
  metrics_cols = st.columns(3)
@@ -2169,211 +2144,167 @@ def main():
2169
  with metrics_cols[2]:
2170
  time_metric = st.empty()
2171
 
2172
- if st.session_state.recognition_camera_running:
2173
- st.info("Cámara activada. Procesando video en tiempo real...")
2174
-
2175
- # Inicializar webcam
2176
- cap = cv2.VideoCapture(0)
2177
-
2178
- if not cap.isOpened():
2179
- st.error("No se pudo acceder a la cámara. Asegúrese de que esté conectada y no esté siendo utilizada por otra aplicación.")
2180
- st.session_state.recognition_camera_running = False
2181
- else:
2182
- try:
2183
- # Variables para métricas
2184
- frame_count = 0
2185
- start_time = time.time()
2186
- last_frame_time = start_time
2187
- fps_history = []
 
 
 
 
 
 
 
 
 
 
2188
 
2189
- while st.session_state.recognition_camera_running:
2190
- # Control de FPS
2191
- current_time = time.time()
2192
- elapsed = current_time - last_frame_time
2193
- if elapsed < 1.0/fps_limit:
2194
- time.sleep(0.01) # Pequeña pausa para no sobrecargar la CPU
2195
- continue
2196
-
2197
- last_frame_time = current_time
2198
-
2199
- # Leer frame
2200
- ret, frame = cap.read()
2201
- if not ret:
2202
- st.error("Error al leer frame de la cámara.")
2203
- break
2204
-
2205
- # Actualizar contador de frames
2206
- frame_count += 1
2207
 
2208
- # Calcular FPS
2209
- if frame_count % 5 == 0:
2210
- fps = 5 / (current_time - start_time)
2211
- fps_history.append(fps)
2212
- if len(fps_history) > 10:
2213
- fps_history.pop(0)
2214
- avg_fps = sum(fps_history) / len(fps_history)
2215
- start_time = current_time
2216
-
2217
- # Actualizar métricas
2218
- fps_metric.metric("FPS", f"{avg_fps:.1f}")
2219
- time_metric.metric("Tiempo activo", f"{int(current_time - time.time() + st.session_state.get('camera_start_time', current_time))}s")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2220
 
2221
- # Detect rostros
2222
- detections = detect_face_dnn(face_net, frame, confidence_threshold)
2223
- _, bboxes = process_face_detections(frame, detections, confidence_threshold)
2224
 
2225
- # Actualizar métrica de rostros
2226
- if frame_count % 5 == 0:
2227
- faces_metric.metric("Faces detected", len(bboxes))
2228
 
2229
- # Reconocer cada rostro
2230
- result_frame = frame.copy()
2231
 
2232
- for i, bbox in enumerate(bboxes):
2233
- face_id = f"face_{i}"
 
 
 
 
 
 
 
2234
 
2235
- # Extraer embedding del rostro
2236
- embedding = extract_face_embeddings(frame, bbox, model_name=model_choice)
2237
 
2238
- if embedding is not None:
2239
- # Compare con rostros registrados
2240
- matches = []
2241
-
2242
- for name, info in st.session_state.face_database.items():
2243
- if 'embeddings' in info:
2244
- # Nuevo formato con múltiples embeddings
2245
- similarities = []
2246
-
2247
- for idx, registered_embedding in enumerate(info['embeddings']):
2248
- # Usar el mismo modelo si es posible
2249
- if info['models'][idx] == model_choice:
2250
- weight = 1.0 # Dar más peso a embeddings del mismo modelo
2251
- else:
2252
- weight = 0.8 # Peso menor para embeddings de otros modelos
2253
-
2254
- # Asegurarse de que los embeddings sean compatibles
2255
- try:
2256
- similarity = cosine_similarity([embedding["embedding"]], [registered_embedding])[0][0] * 100 * weight
2257
- similarities.append(similarity)
2258
- except ValueError as e:
2259
- # Si hay error de dimensiones incompatibles, omitir esta comparación
2260
- continue
2261
-
2262
- # Aplicar método de votación seleccionado
2263
- if voting_method == "Promedio":
2264
- final_similarity = sum(similarities) / len(similarities)
2265
- elif voting_method == "Mejor coincidencia":
2266
- final_similarity = max(similarities)
2267
- else: # Votación ponderada
2268
- # Dar más peso a similitudes más altas
2269
- weighted_sum = sum(s * (i+1) for i, s in enumerate(sorted(similarities)))
2270
- weights_sum = sum(i+1 for i in range(len(similarities)))
2271
- final_similarity = weighted_sum / weights_sum
2272
-
2273
- matches.append({"name": name, "similarity": final_similarity})
2274
- else:
2275
- # Formato antiguo con un solo embedding
2276
- registered_embedding = info['embedding']
2277
- try:
2278
- similarity = cosine_similarity([embedding["embedding"]], [registered_embedding])[0][0] * 100
2279
- matches.append({"name": name, "similarity": similarity})
2280
- except ValueError as e:
2281
- # Si hay error de dimensiones incompatibles, omitir esta comparación
2282
- # Modelos incompatibles: {embedding['model']} vs formato antiguo
2283
- continue
2284
-
2285
- # Ordenar coincidencias por similitud
2286
- matches.sort(key=lambda x: x["similarity"], reverse=True)
2287
-
2288
- # Estabilizar resultados si está activado
2289
- if stabilize_results and matches:
2290
- best_match = matches[0]
2291
-
2292
- # Inicializar historial para este rostro si no existe
2293
- if face_id not in st.session_state.recognition_history:
2294
- st.session_state.recognition_history[face_id] = {
2295
- "names": [],
2296
- "similarities": []
2297
- }
2298
-
2299
- # Añadir al historial
2300
- history = st.session_state.recognition_history[face_id]
2301
- history["names"].append(best_match["name"])
2302
- history["similarities"].append(best_match["similarity"])
2303
-
2304
- # Limitar historial a los últimos 10 frames
2305
- if len(history["names"]) > 10:
2306
- history["names"].pop(0)
2307
- history["similarities"].pop(0)
2308
-
2309
- # Determinar el nombre más frecuente en el historial
2310
- if len(history["names"]) >= 3: # Necesitamos al menos 3 frames para estabilizar
2311
- name_counts = {}
2312
- for name in history["names"]:
2313
- if name not in name_counts:
2314
- name_counts[name] = 0
2315
- name_counts[name] += 1
2316
-
2317
- # Encontrar el nombre más frecuente
2318
- stable_name = max(name_counts.items(), key=lambda x: x[1])[0]
2319
-
2320
- # Calcular similitud promedio para ese nombre
2321
- stable_similarities = [
2322
- history["similarities"][i]
2323
- for i in range(len(history["names"]))
2324
- if history["names"][i] == stable_name
2325
- ]
2326
- stable_similarity = sum(stable_similarities) / len(stable_similarities)
2327
-
2328
- # Reemplazar la mejor coincidencia con el resultado estabilizado
2329
- best_match = {"name": stable_name, "similarity": stable_similarity}
2330
- else:
2331
- best_match = matches[0]
2332
- else:
2333
- best_match = matches[0] if matches else None
2334
 
2335
- # Dibujar resultado en la imagen
2336
- x1, y1, x2, y2, _ = bbox
 
 
 
 
 
 
 
2337
 
2338
- if best_match and best_match["similarity"] >= similarity_threshold:
2339
- # Coincidencia encontrada
2340
- # Color basado en nivel de similitud
2341
- if best_match["similarity"] >= 80:
2342
- color = (0, 255, 0) # Verde para alta similitud
2343
- elif best_match["similarity"] >= 65:
2344
- color = (0, 255, 255) # Amarillo para media similitud
2345
- else:
2346
- color = (0, 165, 255) # Naranja para baja similitud
2347
-
2348
- # Dibujar rectángulo y etiqueta
2349
- cv2.rectangle(result_frame, (x1, y1), (x2, y2), color, 2)
2350
-
2351
- if show_confidence:
2352
- label = f"{best_match['name']}: {best_match['similarity']:.1f}%"
2353
- else:
2354
- label = f"{best_match['name']}"
2355
-
2356
- cv2.putText(result_frame, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
2357
- else:
2358
- # No hay coincidencia
2359
- cv2.rectangle(result_frame, (x1, y1), (x2, y2), (0, 0, 255), 2)
2360
-
2361
- if best_match:
2362
- label = f"Desconocido: {best_match['similarity']:.1f}%" if show_confidence else "Desconocido"
2363
- else:
2364
- label = "Desconocido"
2365
-
2366
- cv2.putText(result_frame, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
2367
-
2368
- # Mostrar resultado
2369
- video_placeholder.image(result_frame, channels="BGR", use_container_width=True)
2370
- finally:
2371
- # Liberar la cámara cuando se detenga
2372
- cap.release()
2373
- # Limpiar historial de reconocimiento
2374
- st.session_state.recognition_history = {}
2375
  else:
2376
- st.info("Haga clic en 'Iniciar Cámara' para comenzar el reconocimiento en tiempo real.")
 
 
 
 
 
 
 
 
 
2377
 
2378
  # Si se ejecuta este archivo directamente, llamar a la función main
2379
  if __name__ == "__main__":
 
12
  import pickle
13
  from sklearn.metrics.pairwise import cosine_similarity # type: ignore
14
  import pandas as pd
15
+ import av
16
+ from streamlit_webrtc import webrtc_streamer, VideoProcessorBase, RTCConfiguration, WebRtcMode
17
 
18
  # Importar las utilidades para la base de datos de rostros
19
  try:
 
2087
  st.warning("No faces registered. Please register at least one face first.")
2088
  else:
2089
  # Configuración avanzada
2090
+ with st.expander("Advanced Configuration", expanded=False):
2091
  # Configuración de umbral de similitud
2092
  similarity_threshold = st.slider(
2093
  "Similarity threshold (%)",
 
2096
  value=45.0,
2097
  step=5.0,
2098
  key="realtime_threshold",
2099
+ help="Minimum similarity percentage to consider a match"
2100
  )
2101
 
2102
  confidence_threshold = st.slider(
 
2106
  value=0.5,
2107
  step=0.05,
2108
  key="realtime_confidence",
2109
+ help="Higher value is more restrictive but more accurate"
2110
  )
2111
 
2112
  model_choice = st.selectbox(
2113
  "Embedding model",
2114
  ["VGG-Face", "Facenet", "OpenFace", "ArcFace"],
2115
  key="realtime_model",
2116
+ help="Different models can give different results depending on facial features"
2117
  )
2118
 
2119
  voting_method = st.radio(
2120
+ "Voting method for multiple embeddings",
2121
+ ["Average", "Best match", "Weighted voting"],
2122
  key="realtime_voting",
2123
+ help="How to combine results when there are multiple images of a person"
2124
  )
2125
 
2126
  show_confidence = st.checkbox(
2127
+ "Show confidence percentage",
2128
  value=True,
2129
+ help="Show similarity percentage next to the name"
2130
  )
2131
 
2132
  stabilize_results = st.checkbox(
2133
+ "Stabilize results",
2134
  value=True,
2135
+ help="Reduce identification fluctuations using temporal averaging"
2136
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2137
 
2138
  # Placeholder para métricas
2139
  metrics_cols = st.columns(3)
 
2144
  with metrics_cols[2]:
2145
  time_metric = st.empty()
2146
 
2147
+ # WebRTC configuration
2148
+ rtc_configuration = RTCConfiguration(
2149
+ {"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}
2150
+ )
2151
+
2152
+ # Define callback to update session state with frames processed
2153
+ class VideoProcessor(VideoProcessorBase):
2154
+ def __init__(self):
2155
+ self.frame_count = 0
2156
+ self.face_count = 0
2157
+ self.start_time = time.time()
2158
+
2159
+ def recv(self, frame):
2160
+ img = frame.to_ndarray(format="bgr24")
2161
+ self.frame_count += 1
2162
+
2163
+ # Detect faces
2164
+ detections = detect_face_dnn(face_net, img, confidence_threshold)
2165
+ _, bboxes = process_face_detections(img, detections, confidence_threshold)
2166
+
2167
+ # Update face count in session state
2168
+ self.face_count = len(bboxes)
2169
+ if self.frame_count % 5 == 0:
2170
+ if 'webrtc_face_count' not in st.session_state:
2171
+ st.session_state.webrtc_face_count = 0
2172
+ st.session_state.webrtc_face_count = self.face_count
2173
 
2174
+ if 'webrtc_fps' not in st.session_state:
2175
+ st.session_state.webrtc_fps = 0
2176
+ elapsed = time.time() - self.start_time
2177
+ st.session_state.webrtc_fps = 5 / elapsed if elapsed > 0 else 0
2178
+ self.start_time = time.time()
2179
+
2180
+ # Recognize each face
2181
+ result_frame = img.copy()
2182
+
2183
+ for i, bbox in enumerate(bboxes):
2184
+ face_id = f"face_{i}"
2185
+
2186
+ # Extract face embedding
2187
+ embedding = extract_face_embeddings(img, bbox, model_name=model_choice)
2188
+
2189
+ if embedding is not None:
2190
+ # Compare with registered faces
2191
+ matches = []
2192
 
2193
+ for name, info in st.session_state.face_database.items():
2194
+ if 'embeddings' in info:
2195
+ # New format with multiple embeddings
2196
+ similarities = []
2197
+
2198
+ for idx, registered_embedding in enumerate(info['embeddings']):
2199
+ # Use same model if possible
2200
+ if info['models'][idx] == model_choice:
2201
+ weight = 1.0 # Give more weight to embeddings from same model
2202
+ else:
2203
+ weight = 0.8 # Less weight for embeddings from other models
2204
+
2205
+ # Make sure embeddings are compatible
2206
+ try:
2207
+ similarity = cosine_similarity([embedding["embedding"]], [registered_embedding])[0][0] * 100 * weight
2208
+ similarities.append(similarity)
2209
+ except ValueError:
2210
+ # If incompatible dimensions error, skip this comparison
2211
+ continue
2212
+
2213
+ # Apply selected voting method
2214
+ if similarities:
2215
+ if voting_method == "Average":
2216
+ final_similarity = sum(similarities) / len(similarities)
2217
+ elif voting_method == "Best match":
2218
+ final_similarity = max(similarities)
2219
+ else: # Weighted voting
2220
+ # Give more weight to higher similarities
2221
+ weighted_sum = sum(s * (i+1) for i, s in enumerate(sorted(similarities)))
2222
+ weights_sum = sum(i+1 for i in range(len(similarities)))
2223
+ final_similarity = weighted_sum / weights_sum
2224
+
2225
+ matches.append({"name": name, "similarity": final_similarity})
2226
+ else:
2227
+ # Old format with single embedding
2228
+ registered_embedding = info['embedding']
2229
+ try:
2230
+ similarity = cosine_similarity([embedding["embedding"]], [registered_embedding])[0][0] * 100
2231
+ matches.append({"name": name, "similarity": similarity})
2232
+ except ValueError:
2233
+ # If incompatible dimensions error, skip this comparison
2234
+ continue
2235
 
2236
+ # Sort matches by similarity
2237
+ matches.sort(key=lambda x: x["similarity"], reverse=True)
 
2238
 
2239
+ # Get best match
2240
+ best_match = matches[0] if matches else None
 
2241
 
2242
+ # Draw results on image
2243
+ x1, y1, x2, y2, _ = bbox
2244
 
2245
+ if best_match and best_match["similarity"] >= similarity_threshold:
2246
+ # Match found
2247
+ # Color based on similarity level
2248
+ if best_match["similarity"] >= 80:
2249
+ color = (0, 255, 0) # Green for high similarity
2250
+ elif best_match["similarity"] >= 65:
2251
+ color = (0, 255, 255) # Yellow for medium similarity
2252
+ else:
2253
+ color = (0, 165, 255) # Orange for low similarity
2254
 
2255
+ # Draw rectangle and label
2256
+ cv2.rectangle(result_frame, (x1, y1), (x2, y2), color, 2)
2257
 
2258
+ if show_confidence:
2259
+ label = f"{best_match['name']}: {best_match['similarity']:.1f}%"
2260
+ else:
2261
+ label = f"{best_match['name']}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2262
 
2263
+ cv2.putText(result_frame, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
2264
+ else:
2265
+ # No match
2266
+ cv2.rectangle(result_frame, (x1, y1), (x2, y2), (0, 0, 255), 2)
2267
+
2268
+ if best_match:
2269
+ label = f"Unknown: {best_match['similarity']:.1f}%" if show_confidence else "Unknown"
2270
+ else:
2271
+ label = "Unknown"
2272
 
2273
+ cv2.putText(result_frame, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
2274
+
2275
+ return av.VideoFrame.from_ndarray(result_frame, format="bgr24")
2276
+
2277
+ # Display WebRTC streamer
2278
+ webrtc_ctx = webrtc_streamer(
2279
+ key="face-recognition",
2280
+ mode=WebRtcMode.SENDRECV,
2281
+ rtc_configuration=rtc_configuration,
2282
+ video_processor_factory=VideoProcessor,
2283
+ media_stream_constraints={"video": True, "audio": False},
2284
+ async_processing=True,
2285
+ )
2286
+
2287
+ # Update metrics if WebRTC is running
2288
+ if webrtc_ctx.state.playing:
2289
+ # Use a separate thread to update metrics
2290
+ faces_metric.metric("Faces detected", st.session_state.get('webrtc_face_count', 0))
2291
+ fps_metric.metric("FPS", f"{st.session_state.get('webrtc_fps', 0):.1f}")
2292
+ time_metric.metric("Status", "Running")
2293
+
2294
+ # Add instructions
2295
+ st.info("WebRTC camera is active. Face recognition is being processed in real-time.")
2296
+ st.warning("Note: For better performance, make sure you have good lighting and face the camera directly.")
 
 
 
 
 
 
 
 
 
 
 
 
 
2297
  else:
2298
+ faces_metric.metric("Faces detected", 0)
2299
+ fps_metric.metric("FPS", 0)
2300
+ time_metric.metric("Status", "Stopped")
2301
+
2302
+ st.info("Click 'Start' to activate the camera and begin real-time face recognition.")
2303
+ st.warning("Note: If you're running this in Hugging Face Spaces, some browser permissions may be required.")
2304
+
2305
+ # Add a note about privacy
2306
+ st.markdown("---")
2307
+ st.markdown("**Privacy Note**: Video is processed in your browser and on the server. No video data is stored permanently.")
2308
 
2309
  # Si se ejecuta este archivo directamente, llamar a la función main
2310
  if __name__ == "__main__":