jarondon82 commited on
Commit
a0de189
·
1 Parent(s): 11b9e7e

Optimizar WebRTC y añadir modos alternativos de reconocimiento

Browse files
Files changed (1) hide show
  1. streamlit_app.py +134 -108
streamlit_app.py CHANGED
@@ -2146,7 +2146,11 @@ def main():
2146
 
2147
  # WebRTC configuration
2148
  rtc_configuration = RTCConfiguration(
2149
- {"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}
 
 
 
 
2150
  )
2151
 
2152
  # Define callback to update session state with frames processed
@@ -2155,124 +2159,65 @@ def main():
2155
  self.frame_count = 0
2156
  self.face_count = 0
2157
  self.start_time = time.time()
 
 
2158
 
2159
  def recv(self, frame):
2160
  img = frame.to_ndarray(format="bgr24")
2161
  self.frame_count += 1
2162
 
2163
- # Detect faces
2164
- detections = detect_face_dnn(face_net, img, confidence_threshold)
2165
- _, bboxes = process_face_detections(img, detections, confidence_threshold)
2166
 
2167
- # Update face count in session state
2168
- self.face_count = len(bboxes)
2169
- if self.frame_count % 5 == 0:
2170
- if 'webrtc_face_count' not in st.session_state:
2171
- st.session_state.webrtc_face_count = 0
2172
- st.session_state.webrtc_face_count = self.face_count
2173
 
2174
- if 'webrtc_fps' not in st.session_state:
2175
- st.session_state.webrtc_fps = 0
2176
- elapsed = time.time() - self.start_time
2177
- st.session_state.webrtc_fps = 5 / elapsed if elapsed > 0 else 0
2178
- self.start_time = time.time()
2179
-
2180
- # Recognize each face
2181
- result_frame = img.copy()
2182
-
2183
- for i, bbox in enumerate(bboxes):
2184
- face_id = f"face_{i}"
2185
 
2186
- # Extract face embedding
2187
- embedding = extract_face_embeddings(img, bbox, model_name=model_choice)
 
 
 
 
 
 
 
 
2188
 
2189
- if embedding is not None:
2190
- # Compare with registered faces
2191
- matches = []
2192
-
2193
- for name, info in st.session_state.face_database.items():
2194
- if 'embeddings' in info:
2195
- # New format with multiple embeddings
2196
- similarities = []
2197
-
2198
- for idx, registered_embedding in enumerate(info['embeddings']):
2199
- # Use same model if possible
2200
- if info['models'][idx] == model_choice:
2201
- weight = 1.0 # Give more weight to embeddings from same model
2202
- else:
2203
- weight = 0.8 # Less weight for embeddings from other models
2204
-
2205
- # Make sure embeddings are compatible
2206
- try:
2207
- similarity = cosine_similarity([embedding["embedding"]], [registered_embedding])[0][0] * 100 * weight
2208
- similarities.append(similarity)
2209
- except ValueError:
2210
- # If incompatible dimensions error, skip this comparison
2211
- continue
2212
-
2213
- # Apply selected voting method
2214
- if similarities:
2215
- if voting_method == "Average":
2216
- final_similarity = sum(similarities) / len(similarities)
2217
- elif voting_method == "Best match":
2218
- final_similarity = max(similarities)
2219
- else: # Weighted voting
2220
- # Give more weight to higher similarities
2221
- weighted_sum = sum(s * (i+1) for i, s in enumerate(sorted(similarities)))
2222
- weights_sum = sum(i+1 for i in range(len(similarities)))
2223
- final_similarity = weighted_sum / weights_sum
2224
-
2225
- matches.append({"name": name, "similarity": final_similarity})
2226
- else:
2227
- # Old format with single embedding
2228
- registered_embedding = info['embedding']
2229
- try:
2230
- similarity = cosine_similarity([embedding["embedding"]], [registered_embedding])[0][0] * 100
2231
- matches.append({"name": name, "similarity": similarity})
2232
- except ValueError:
2233
- # If incompatible dimensions error, skip this comparison
2234
- continue
2235
 
2236
- # Sort matches by similarity
2237
- matches.sort(key=lambda x: x["similarity"], reverse=True)
2238
-
2239
- # Get best match
2240
- best_match = matches[0] if matches else None
2241
-
2242
- # Draw results on image
 
 
 
 
2243
  x1, y1, x2, y2, _ = bbox
2244
-
2245
- if best_match and best_match["similarity"] >= similarity_threshold:
2246
- # Match found
2247
- # Color based on similarity level
2248
- if best_match["similarity"] >= 80:
2249
- color = (0, 255, 0) # Green for high similarity
2250
- elif best_match["similarity"] >= 65:
2251
- color = (0, 255, 255) # Yellow for medium similarity
2252
- else:
2253
- color = (0, 165, 255) # Orange for low similarity
2254
-
2255
- # Draw rectangle and label
2256
- cv2.rectangle(result_frame, (x1, y1), (x2, y2), color, 2)
2257
-
2258
- if show_confidence:
2259
- label = f"{best_match['name']}: {best_match['similarity']:.1f}%"
2260
- else:
2261
- label = f"{best_match['name']}"
2262
-
2263
- cv2.putText(result_frame, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
2264
- else:
2265
- # No match
2266
- cv2.rectangle(result_frame, (x1, y1), (x2, y2), (0, 0, 255), 2)
2267
-
2268
- if best_match:
2269
- label = f"Unknown: {best_match['similarity']:.1f}%" if show_confidence else "Unknown"
2270
- else:
2271
- label = "Unknown"
2272
-
2273
- cv2.putText(result_frame, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
2274
 
2275
- return av.VideoFrame.from_ndarray(result_frame, format="bgr24")
 
 
2276
 
2277
  # Display WebRTC streamer
2278
  webrtc_ctx = webrtc_streamer(
@@ -2280,8 +2225,13 @@ def main():
2280
  mode=WebRtcMode.SENDRECV,
2281
  rtc_configuration=rtc_configuration,
2282
  video_processor_factory=VideoProcessor,
2283
- media_stream_constraints={"video": True, "audio": False},
2284
  async_processing=True,
 
 
 
 
 
2285
  )
2286
 
2287
  # Update metrics if WebRTC is running
@@ -2301,6 +2251,82 @@ def main():
2301
 
2302
  st.info("Click 'Start' to activate the camera and begin real-time face recognition.")
2303
  st.warning("Note: If you're running this in Hugging Face Spaces, some browser permissions may be required.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2304
 
2305
  # Add a note about privacy
2306
  st.markdown("---")
 
2146
 
2147
  # WebRTC configuration
2148
  rtc_configuration = RTCConfiguration(
2149
+ {"iceServers": [
2150
+ {"urls": ["stun:stun.l.google.com:19302"]},
2151
+ {"urls": ["stun:stun1.l.google.com:19302"]},
2152
+ {"urls": ["stun:stun2.l.google.com:19302"]}
2153
+ ]}
2154
  )
2155
 
2156
  # Define callback to update session state with frames processed
 
2159
  self.frame_count = 0
2160
  self.face_count = 0
2161
  self.start_time = time.time()
2162
+ self.processing = True
2163
+ self.frame_skip = 3 # Solo procesar cada 3 frames para reducir carga
2164
 
2165
  def recv(self, frame):
2166
  img = frame.to_ndarray(format="bgr24")
2167
  self.frame_count += 1
2168
 
2169
+ # Solo procesar algunos frames para reducir carga
2170
+ if self.frame_count % self.frame_skip != 0:
2171
+ return av.VideoFrame.from_ndarray(img, format="bgr24")
2172
 
2173
+ try:
2174
+ # Reducir tamaño del frame para procesamiento más rápido
2175
+ scale_factor = 0.5
2176
+ small_img = cv2.resize(img, (0, 0), fx=scale_factor, fy=scale_factor)
 
 
2177
 
2178
+ # Detect faces
2179
+ detections = detect_face_dnn(face_net, small_img, confidence_threshold)
2180
+ _, bboxes = process_face_detections(small_img, detections, confidence_threshold)
 
 
 
 
 
 
 
 
2181
 
2182
+ # Ajustar bounding boxes al tamaño original
2183
+ original_bboxes = []
2184
+ for x1, y1, x2, y2, conf in bboxes:
2185
+ original_bboxes.append((
2186
+ int(x1 / scale_factor),
2187
+ int(y1 / scale_factor),
2188
+ int(x2 / scale_factor),
2189
+ int(y2 / scale_factor),
2190
+ conf
2191
+ ))
2192
 
2193
+ # Update face count in session state
2194
+ self.face_count = len(original_bboxes)
2195
+ if self.frame_count % 15 == 0: # Actualizar métricas con menos frecuencia
2196
+ if 'webrtc_face_count' not in st.session_state:
2197
+ st.session_state.webrtc_face_count = 0
2198
+ st.session_state.webrtc_face_count = self.face_count
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2199
 
2200
+ if 'webrtc_fps' not in st.session_state:
2201
+ st.session_state.webrtc_fps = 0
2202
+ elapsed = time.time() - self.start_time
2203
+ st.session_state.webrtc_fps = 15 / elapsed if elapsed > 0 else 0
2204
+ self.start_time = time.time()
2205
+
2206
+ # Recognize each face
2207
+ result_frame = img.copy()
2208
+
2209
+ # Simplificar - solo dibujar rectángulos para esta versión rápida
2210
+ for i, bbox in enumerate(original_bboxes):
2211
  x1, y1, x2, y2, _ = bbox
2212
+ cv2.rectangle(result_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
2213
+ cv2.putText(result_frame, f"Face {i+1}", (x1, y1-10),
2214
+ cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
2215
+
2216
+ return av.VideoFrame.from_ndarray(result_frame, format="bgr24")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2217
 
2218
+ except Exception as e:
2219
+ print(f"Error en procesamiento de video: {e}")
2220
+ return av.VideoFrame.from_ndarray(img, format="bgr24")
2221
 
2222
  # Display WebRTC streamer
2223
  webrtc_ctx = webrtc_streamer(
 
2225
  mode=WebRtcMode.SENDRECV,
2226
  rtc_configuration=rtc_configuration,
2227
  video_processor_factory=VideoProcessor,
2228
+ media_stream_constraints={"video": {"width": 640, "height": 480}, "audio": False},
2229
  async_processing=True,
2230
+ video_html_attrs={
2231
+ "style": {"width": "100%", "margin": "0 auto", "border": "2px solid"},
2232
+ "controls": False,
2233
+ "autoPlay": True,
2234
+ },
2235
  )
2236
 
2237
  # Update metrics if WebRTC is running
 
2251
 
2252
  st.info("Click 'Start' to activate the camera and begin real-time face recognition.")
2253
  st.warning("Note: If you're running this in Hugging Face Spaces, some browser permissions may be required.")
2254
+
2255
+ # Opción alternativa en caso de problemas con WebRTC
2256
+ st.markdown("---")
2257
+ st.markdown("### ¿Problemas con WebRTC?")
2258
+ col1, col2 = st.columns(2)
2259
+ demo_mode = col1.button("Usar modo de demostración")
2260
+ upload_mode = col2.button("Subir imagen para reconocimiento")
2261
+
2262
+ if demo_mode:
2263
+ st.session_state.demo_running = True
2264
+ st.session_state.upload_mode = False
2265
+ elif upload_mode:
2266
+ st.session_state.upload_mode = True
2267
+ st.session_state.demo_running = False
2268
+
2269
+ # Modo de demostración con imágenes simuladas
2270
+ if st.session_state.get('demo_running', False):
2271
+ # Cargar algunas imágenes de ejemplo (usar tus propias imágenes si es posible)
2272
+ demo_img = None
2273
+
2274
+ # Intentar usar una imagen de la base de datos
2275
+ if st.session_state.face_database:
2276
+ for name, info in st.session_state.face_database.items():
2277
+ if 'image' in info:
2278
+ try:
2279
+ demo_img = info['image']
2280
+ break
2281
+ except:
2282
+ pass
2283
+
2284
+ # Si no hay imagen disponible, crear una imagen en blanco
2285
+ if demo_img is None:
2286
+ demo_img = np.ones((480, 640, 3), dtype=np.uint8) * 255
2287
+ # Dibujar un círculo como "cara" simulada
2288
+ cv2.circle(demo_img, (320, 240), 100, (0, 0, 255), -1)
2289
+ cv2.circle(demo_img, (280, 200), 15, (255, 255, 255), -1)
2290
+ cv2.circle(demo_img, (360, 200), 15, (255, 255, 255), -1)
2291
+ cv2.ellipse(demo_img, (320, 260), (50, 30), 0, 0, 180, (255, 255, 255), -1)
2292
+
2293
+ # Mostrar la imagen
2294
+ st.image(demo_img, channels="BGR", caption="Modo de demostración", use_column_width=True)
2295
+
2296
+ # Simular métricas
2297
+ faces_metric.metric("Faces detected", 1)
2298
+ fps_metric.metric("FPS", "15.5")
2299
+ time_metric.metric("Status", "Demo")
2300
+
2301
+ st.success("Modo de demostración activado. En un entorno local, el reconocimiento facial en tiempo real funcionaría correctamente.")
2302
+
2303
+ # Modo de carga de imagen
2304
+ if st.session_state.get('upload_mode', False):
2305
+ uploaded_file = st.file_uploader("Sube una imagen con rostros", type=["jpg", "jpeg", "png"])
2306
+
2307
+ if uploaded_file is not None:
2308
+ # Leer imagen
2309
+ image_bytes = uploaded_file.read()
2310
+ image = cv2.imdecode(np.frombuffer(image_bytes, np.uint8), cv2.IMREAD_COLOR)
2311
+
2312
+ # Detectar rostros
2313
+ detections = detect_face_dnn(face_net, image, confidence_threshold)
2314
+ _, bboxes = process_face_detections(image, detections, confidence_threshold)
2315
+
2316
+ # Dibujar rostros detectados
2317
+ result_img = image.copy()
2318
+ for i, bbox in enumerate(bboxes):
2319
+ x1, y1, x2, y2, _ = bbox
2320
+ cv2.rectangle(result_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
2321
+ cv2.putText(result_img, f"Face {i+1}", (x1, y1-10),
2322
+ cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
2323
+
2324
+ # Mostrar resultado
2325
+ st.image(result_img, channels="BGR", caption="Rostros detectados", use_column_width=True)
2326
+
2327
+ # Actualizar métricas
2328
+ faces_metric.metric("Faces detected", len(bboxes))
2329
+ time_metric.metric("Status", "Processed")
2330
 
2331
  # Add a note about privacy
2332
  st.markdown("---")