Spaces:

Segizu
/

Face_Recognition

Build error

App Files Files Community

Segizu commited on May 12, 2025

Commit

8baf080

1 Parent(s): 9c5866b

ZeroGPU

Browse files

Files changed (2) hide show

README.md +23 -5
app.py +31 -67

README.md CHANGED Viewed

@@ -1,12 +1,30 @@
 ---
-title: Face Recognition
-emoji: ⚡
-colorFrom: red
-colorTo: blue
 sdk: gradio
-sdk_version: 5.23.0
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Facial Recognition App
+emoji: 🔍
+colorFrom: blue
+colorTo: purple
 sdk: gradio
+sdk_version: 3.50.2
 app_file: app.py
 pinned: false
 ---
+# Facial Recognition App
+This application uses DeepFace and Facenet for facial recognition and similarity matching.
+## Hardware Requirements
+- GPU: Required
+- CPU: 4+ cores recommended
+- RAM: 8GB+ recommended
+## Environment Setup
+The application requires the following key dependencies:
+- deepface
+- gradio
+- huggingface_hub
+- datasets
+- Pillow
+- numpy
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import numpy as np
 from PIL import Image
 import gradio as gr
 from deepface import DeepFace
 from datasets import load_dataset
-import os
 import pickle
 from io import BytesIO
 from huggingface_hub import upload_file, hf_hub_download, list_repo_files
@@ -13,12 +13,15 @@ import requests
 import time
 import shutil
 import tarfile
 # 🔁 Limpiar almacenamiento temporal si existe
 def clean_temp_dirs():
     print("🧹 Limpiando carpetas temporales...")
     for folder in ["embeddings", "batches"]:
         path = Path(folder)
         if path.exists() and path.is_dir():
@@ -36,10 +39,9 @@ LOCAL_EMB_DIR.mkdir(exist_ok=True)
 HF_TOKEN = os.getenv("HF_TOKEN")
 headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
-# 💾 Configuración de control de almacenamiento
 MAX_TEMP_STORAGE_GB = 40
 UPLOAD_EVERY = 50
-embeddings_to_upload = []
 def get_folder_size(path):
     total = 0
@@ -47,30 +49,12 @@ def get_folder_size(path):
         for f in filenames:
             fp = os.path.join(dirpath, f)
             total += os.path.getsize(fp)
-    return total / (1024 ** 3)  # En GB
-def flush_embeddings():
-    global embeddings_to_upload
-    print("🚀 Subiendo lote de embeddings a Hugging Face...")
-    for emb_file in embeddings_to_upload:
-        try:
-            filename = emb_file.name
-            upload_file(
-                path_or_fileobj=str(emb_file),
-                path_in_repo=f"{EMBEDDINGS_SUBFOLDER}/{filename}",
-                repo_id=DATASET_ID,
-                repo_type="dataset",
-                token=HF_TOKEN
-            )
-            os.remove(emb_file)
-            print(f"✅ Subido y eliminado: {filename}")
-            time.sleep(1.2)  # Evita 429
-        except Exception as e:
-            print(f"❌ Error subiendo {filename}: {e}")
-            continue
-    embeddings_to_upload = []
 # ✅ Cargar CSV desde el dataset
 dataset = load_dataset(
@@ -81,19 +65,9 @@ dataset = load_dataset(
     header=0
 )
-print("✅ Validación post-carga")
-print(dataset[0])
-print("Columnas:", dataset.column_names)
-# 🔄 Preprocesamiento
-def preprocess_image(img: Image.Image) -> np.ndarray:
-    img_rgb = img.convert("RGB")
-    img_resized = img_rgb.resize((160, 160), Image.Resampling.LANCZOS)
-    return np.array(img_resized)
 def build_database():
-    print(f"📊 Uso actual de almacenamiento tempora _ INICIO_: {get_folder_size('.'):.2f} GB")
     print("🔄 Generando embeddings...")
     batch_size = 10
     archive_batch_size = 50
@@ -107,8 +81,7 @@ def build_database():
         print(f"📦 Lote {i // batch_size + 1}/{(len(dataset) + batch_size - 1) // batch_size}")
         for j in range(len(batch["image"])):
-            item = {"image": batch["image"][j]}
-            image_url = item["image"]
             if not isinstance(image_url, str) or not image_url.startswith("http") or image_url.strip().lower() == "image":
                 print(f"⚠️ Saltando {i + j} - URL inválida: {image_url}")
@@ -117,7 +90,7 @@ def build_database():
             name = f"image_{i + j}"
             filename = LOCAL_EMB_DIR / f"{name}.pkl"
-            # Verificar si ya existe en Hugging Face Hub
             try:
                 hf_hub_download(
                     repo_id=DATASET_ID,
@@ -149,8 +122,7 @@ def build_database():
                 del img_processed
                 gc.collect()
-                # Si llegamos al tamaño de archivo por lote o espacio es crítico
-                if len(batch_files) >= archive_batch_size or get_folder_size(".") > 40:
                     archive_path = ARCHIVE_DIR / f"batch_{batch_index:03}.tar.gz"
                     with tarfile.open(archive_path, "w:gz") as tar:
                         for file in batch_files:
@@ -158,7 +130,6 @@ def build_database():
                     print(f"📦 Empaquetado: {archive_path}")
-                    # Subida al Hub
                     upload_file(
                         path_or_fileobj=str(archive_path),
                         path_in_repo=f"{EMBEDDINGS_SUBFOLDER}/{archive_path.name}",
@@ -168,24 +139,20 @@ def build_database():
                     )
                     print(f"✅ Subido: {archive_path.name}")
-                    # Borrar .pkl y el .tar.gz local
                     for f in batch_files:
                         f.unlink()
                     archive_path.unlink()
                     print("🧹 Limpieza completada tras subida")
                     batch_files = []
                     batch_index += 1
-                    time.sleep(2)  # Pausa para evitar 429
-                    print(f"📊 Uso actual de almacenamiento tempora _ FINAL_: {get_folder_size('.'):.2f} GB")
             except Exception as e:
                 print(f"❌ Error en {name}: {e}")
                 continue
-    # Último lote si queda algo
     if batch_files:
         archive_path = ARCHIVE_DIR / f"batch_{batch_index:03}.tar.gz"
         with tarfile.open(archive_path, "w:gz") as tar:
@@ -207,8 +174,7 @@ def build_database():
         archive_path.unlink()
         print("✅ Subida y limpieza final")
-# 🔍 Buscar similitudes desde archivos remotos
 def find_similar_faces(uploaded_image: Image.Image):
     try:
         img_processed = preprocess_image(uploaded_image)
@@ -259,20 +225,18 @@ def find_similar_faces(uploaded_image: Image.Image):
     summary = "\n".join([f"{name} - Similitud: {sim:.2f}" for sim, name, _ in top])
     return gallery, summary
-# 🚀 Inicializar
-print("🚀 Iniciando app...")
-build_database()
 # 🎛️ Interfaz Gradio
-demo = gr.Interface(
-    fn=find_similar_faces,
-    inputs=gr.Image(label="📤 Sube una imagen", type="pil"),
-    outputs=[
-        gr.Gallery(label="📸 Rostros similares"),
-        gr.Textbox(label="🧠 Detalle", lines=6)
-    ],
-    title="🔍 Reconocimiento facial con DeepFace",
-    description="Sube una imagen y encuentra coincidencias en el dataset privado de Hugging Face usando embeddings Facenet."
-)
 demo.launch()

+import os
 import numpy as np
 from PIL import Image
 import gradio as gr
 from deepface import DeepFace
 from datasets import load_dataset
 import pickle
 from io import BytesIO
 from huggingface_hub import upload_file, hf_hub_download, list_repo_files
 import time
 import shutil
 import tarfile
+import tensorflow as tf
+from spaces import GPU
+# 🔍 Mostrar dispositivos disponibles
+print("🔍 Dispositivos disponibles:", tf.config.list_physical_devices())
 # 🔁 Limpiar almacenamiento temporal si existe
 def clean_temp_dirs():
     print("🧹 Limpiando carpetas temporales...")
     for folder in ["embeddings", "batches"]:
         path = Path(folder)
         if path.exists() and path.is_dir():
 HF_TOKEN = os.getenv("HF_TOKEN")
 headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
+# 💾 Configuración
 MAX_TEMP_STORAGE_GB = 40
 UPLOAD_EVERY = 50
 def get_folder_size(path):
     total = 0
         for f in filenames:
             fp = os.path.join(dirpath, f)
             total += os.path.getsize(fp)
+    return total / (1024 ** 3)
+def preprocess_image(img: Image.Image) -> np.ndarray:
+    img_rgb = img.convert("RGB")
+    img_resized = img_rgb.resize((160, 160), Image.Resampling.LANCZOS)
+    return np.array(img_resized)
 # ✅ Cargar CSV desde el dataset
 dataset = load_dataset(
     header=0
 )
+@GPU
 def build_database():
+    print(f"📊 Uso actual de almacenamiento temporal INICIO: {get_folder_size('.'):.2f} GB")
     print("🔄 Generando embeddings...")
     batch_size = 10
     archive_batch_size = 50
         print(f"📦 Lote {i // batch_size + 1}/{(len(dataset) + batch_size - 1) // batch_size}")
         for j in range(len(batch["image"])):
+            image_url = batch["image"][j]
             if not isinstance(image_url, str) or not image_url.startswith("http") or image_url.strip().lower() == "image":
                 print(f"⚠️ Saltando {i + j} - URL inválida: {image_url}")
             name = f"image_{i + j}"
             filename = LOCAL_EMB_DIR / f"{name}.pkl"
+            # Verificar si ya fue subido
             try:
                 hf_hub_download(
                     repo_id=DATASET_ID,
                 del img_processed
                 gc.collect()
+                if len(batch_files) >= archive_batch_size or get_folder_size(".") > MAX_TEMP_STORAGE_GB:
                     archive_path = ARCHIVE_DIR / f"batch_{batch_index:03}.tar.gz"
                     with tarfile.open(archive_path, "w:gz") as tar:
                         for file in batch_files:
                     print(f"📦 Empaquetado: {archive_path}")
                     upload_file(
                         path_or_fileobj=str(archive_path),
                         path_in_repo=f"{EMBEDDINGS_SUBFOLDER}/{archive_path.name}",
                     )
                     print(f"✅ Subido: {archive_path.name}")
                     for f in batch_files:
                         f.unlink()
                     archive_path.unlink()
                     print("🧹 Limpieza completada tras subida")
                     batch_files = []
                     batch_index += 1
+                    time.sleep(2)
+                    print(f"📊 Uso actual FINAL: {get_folder_size('.'):.2f} GB")
             except Exception as e:
                 print(f"❌ Error en {name}: {e}")
                 continue
     if batch_files:
         archive_path = ARCHIVE_DIR / f"batch_{batch_index:03}.tar.gz"
         with tarfile.open(archive_path, "w:gz") as tar:
         archive_path.unlink()
         print("✅ Subida y limpieza final")
+# 🔍 Buscar similitudes
 def find_similar_faces(uploaded_image: Image.Image):
     try:
         img_processed = preprocess_image(uploaded_image)
     summary = "\n".join([f"{name} - Similitud: {sim:.2f}" for sim, name, _ in top])
     return gallery, summary
 # 🎛️ Interfaz Gradio
+with gr.Blocks() as demo:
+    gr.Markdown("## 🔍 Reconocimiento facial con DeepFace + ZeroGPU")
+    with gr.Row():
+        image_input = gr.Image(label="📤 Sube una imagen", type="pil")
+        find_btn = gr.Button("🔎 Buscar similares")
+    gallery = gr.Gallery(label="📸 Rostros similares")
+    summary = gr.Textbox(label="🧠 Detalle", lines=6)
+    find_btn.click(fn=find_similar_faces, inputs=image_input, outputs=[gallery, summary])
+    with gr.Row():
+        build_btn = gr.Button("⚙️ Construir base de embeddings (usa GPU)")
+        build_btn.click(fn=build_database, inputs=[], outputs=[])
 demo.launch()