Spaces:

fabiosam
/

EnSenas

Sleeping

App Files Files Community

fabiosam commited on Nov 17, 2025

Commit

af4ab3b

verified ·

1 Parent(s): f52c4ac

Create app.py

Browse files

Files changed (1) hide show

app.py +183 -0

app.py ADDED Viewed

	@@ -0,0 +1,183 @@

+import os
+import cv2
+import json
+import numpy as np
+import mediapipe as mp
+import tensorflow as tf
+from tensorflow import keras
+import gradio as gr
+# =========================
+# CONFIGURACIÓN BÁSICA
+# =========================
+MAX_FRAMES = 20  # debe ser el mismo valor que usaste al entrenar
+MODEL_DIR = "models"
+MODEL_PATH = os.path.join(MODEL_DIR, "sign_model_lstm_v1.keras")
+LABELS_PATH = os.path.join(MODEL_DIR, "label_names.json")
+print("TensorFlow version:", tf.__version__)
+print("Cargando modelo desde:", MODEL_PATH)
+# Carga del modelo LSTM
+model = keras.models.load_model(MODEL_PATH)
+# Carga de nombres de clase
+with open(LABELS_PATH, "r") as f:
+    label_names = json.load(f)
+mp_holistic = mp.solutions.holistic
+# =========================
+# EXTRACCIÓN DE LANDMARKS
+# =========================
+def extract_landmarks_from_results(results):
+    """
+    Convierte los resultados de MediaPipe Holistic en un vector 1D.
+    Pose (33), mano izq (21), mano der (21) -> 75 puntos.
+    Cada punto = (x, y, z) => 75 * 3 = 225 features.
+    """
+    def get_xyz(landmarks, n_points):
+        if landmarks is None:
+            data = [[0.0, 0.0, 0.0]] * n_points
+        else:
+            data = [[lm.x, lm.y, lm.z] for lm in landmarks]
+            if len(data) < n_points:
+                data += [[0.0, 0.0, 0.0]] * (n_points - len(data))
+            data = data[:n_points]
+        return data
+    pose = get_xyz(
+        results.pose_landmarks.landmark if results.pose_landmarks else None,
+        33
+    )
+    left_hand = get_xyz(
+        results.left_hand_landmarks.landmark if results.left_hand_landmarks else None,
+        21
+    )
+    right_hand = get_xyz(
+        results.right_hand_landmarks.landmark if results.right_hand_landmarks else None,
+        21
+    )
+    all_points = pose + left_hand + right_hand
+    return np.array(all_points, dtype=np.float32).flatten()  # (225,)
+# =========================
+# PROCESAR VIDEO -> SECUENCIA
+# =========================
+def preprocess_video_to_sequence(video_path, max_frames=MAX_FRAMES):
+    """
+    Procesa un video (archivo .mp4, .mov, etc.) con MediaPipe Holistic
+    y devuelve una secuencia (1, max_frames, 225) lista para el modelo.
+    """
+    cap = cv2.VideoCapture(video_path)
+    frames_feats = []
+    with mp_holistic.Holistic(
+        static_image_mode=False,
+        model_complexity=1,
+        enable_segmentation=False,
+        refine_face_landmarks=False,
+        min_detection_confidence=0.5,
+        min_tracking_confidence=0.5
+    ) as holistic:
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            results = holistic.process(frame_rgb)
+            vec = extract_landmarks_from_results(results)
+            frames_feats.append(vec)
+            if len(frames_feats) >= max_frames:
+                break
+    cap.release()
+    if len(frames_feats) == 0:
+        raise ValueError("El video no tiene frames válidos para procesar.")
+    seq = np.array(frames_feats, dtype=np.float32)
+    # Padding o recorte a max_frames
+    if seq.shape[0] < max_frames:
+        pad_len = max_frames - seq.shape[0]
+        pad = np.zeros((pad_len, seq.shape[1]), dtype=np.float32)
+        seq = np.concatenate([seq, pad], axis=0)
+    else:
+        seq = seq[:max_frames, :]
+    seq = seq.reshape(1, max_frames, seq.shape[1])  # (1, T, 225)
+    return seq
+# =========================
+# FUNCIÓN DE PREDICCIÓN PARA GRADIO
+# =========================
+def predict_sign(video):
+    """
+    Gradio pasa 'video' como ruta al archivo temporal (.mp4) grabado o subido.
+    """
+    if video is None:
+        return "Sube o graba un video primero.", {}
+    try:
+        seq = preprocess_video_to_sequence(video, max_frames=MAX_FRAMES)
+        probs = model.predict(seq, verbose=0)[0]  # (num_classes,)
+        idx = int(np.argmax(probs))
+        label = label_names[idx]
+        conf = float(probs[idx])
+        # Para mostrar distribución de probabilidades en Gradio:
+        probs_dict = {
+            name: float(probs[i])
+            for i, name in enumerate(label_names)
+        }
+        result_text = f"Seña predicha: {label} (confianza {conf:.2f})"
+        return result_text, probs_dict
+    except Exception as e:
+        return f"Error procesando el video: {str(e)}", {}
+# =========================
+# INTERFAZ GRADIO
+# =========================
+title = "LSP-EnSeñas - Demo LSTM"
+description = """
+Sube o graba un video corto haciendo una seña (por ejemplo, uno de los colores
+que se usaron en el entrenamiento). El modelo LSTM analiza la secuencia de
+landmarks (cuerpo y manos) usando MediaPipe Holistic y predice la clase más probable.
+"""
+demo = gr.Interface(
+    fn=predict_sign,
+    inputs=gr.Video(
+        source="webcam",  # también permite subir archivo
+        label="Video de la seña (webcam o upload)"
+    ),
+    outputs=[
+        gr.Textbox(label="Resultado"),
+        gr.Label(label="Probabilidades por clase")
+    ],
+    title=title,
+    description=description,
+    allow_flagging="never"
+)
+if __name__ == "__main__":
+    demo.launch()