EnSenas / app.py
fabiosam's picture
Update app.py
56da706 verified
raw
history blame
4.9 kB
import os
import json
import cv2
import numpy as np
import mediapipe as mp
import tensorflow as tf
import gradio as gr
print("TensorFlow version:", tf.__version__)
# ==== RUTAS DEL MODELO ====
BASE_DIR = os.path.dirname(__file__)
MODELS_DIR = os.path.join(BASE_DIR, "models")
MODEL_PATH = os.path.join(MODELS_DIR, "sign_model_lstm_v1.keras")
LABELS_PATH = os.path.join(MODELS_DIR, "label_names.json")
print("Cargando modelo desde:", MODEL_PATH)
model = tf.keras.models.load_model(MODEL_PATH)
with open(LABELS_PATH, "r") as f:
label_names = json.load(f)
mp_holistic = mp.solutions.holistic
MAX_FRAMES = 20 # mismo valor que usaste al entrenar
N_FEATURES = 225 # 75 puntos * 3 coords (x, y, z)
# ========= FUNCIONES DE PROCESADO =========
def extract_landmarks_from_results(results):
"""
Convierte los resultados de MediaPipe Holistic en un vector 1D (225,)
con pose (33), mano izq (21) y mano der (21).
"""
def get_xyz(landmarks, n_points):
if landmarks is None:
data = [[0.0, 0.0, 0.0]] * n_points
else:
data = [[lm.x, lm.y, lm.z] for lm in landmarks]
if len(data) < n_points:
data += [[0.0, 0.0, 0.0]] * (n_points - len(data))
data = data[:n_points]
return data
pose = get_xyz(
results.pose_landmarks.landmark if results.pose_landmarks else None,
33
)
left_hand = get_xyz(
results.left_hand_landmarks.landmark if results.left_hand_landmarks else None,
21
)
right_hand = get_xyz(
results.right_hand_landmarks.landmark if results.right_hand_landmarks else None,
21
)
all_points = pose + left_hand + right_hand
return np.array(all_points, dtype=np.float32).flatten() # (225,)
def preprocess_video_to_sequence(video_path, max_frames=MAX_FRAMES):
"""
Lee un video, extrae landmarks por frame y devuelve
una secuencia (1, max_frames, 225) lista para el LSTM.
"""
if video_path is None:
raise ValueError("No se recibi贸 ruta de video.")
cap = cv2.VideoCapture(video_path)
frames_feats = []
with mp_holistic.Holistic(
static_image_mode=False,
model_complexity=1,
enable_segmentation=False,
refine_face_landmarks=False,
min_detection_confidence=0.5,
min_tracking_confidence=0.5
) as holistic:
while True:
ret, frame = cap.read()
if not ret:
break
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results = holistic.process(frame_rgb)
vec = extract_landmarks_from_results(results) # (225,)
frames_feats.append(vec)
if len(frames_feats) >= max_frames:
break
cap.release()
if len(frames_feats) == 0:
raise ValueError("El video no tiene frames v谩lidos para procesar.")
seq = np.array(frames_feats, dtype=np.float32)
# padding / recorte
if seq.shape[0] < max_frames:
pad_len = max_frames - seq.shape[0]
pad = np.zeros((pad_len, seq.shape[1]), dtype=np.float32)
seq = np.concatenate([seq, pad], axis=0)
else:
seq = seq[:max_frames, :]
seq = seq.reshape(1, max_frames, seq.shape[1]) # (1, T, 225)
return seq
def predict_video_lstm(video_path):
"""
Funci贸n que usa Gradio:
- Recibe la ruta de un video
- Devuelve la predicci贸n principal + distribuci贸n de probabilidades
"""
if video_path is None:
return "Sube o graba un video primero.", {}
try:
seq = preprocess_video_to_sequence(video_path, max_frames=MAX_FRAMES)
probs = model.predict(seq, verbose=0)[0] # (num_classes,)
except Exception as e:
return f"Error procesando el video: {e}", {}
idx = int(np.argmax(probs))
label = label_names[idx]
conf = float(probs[idx])
prob_dict = {name: float(probs[i]) for i, name in enumerate(label_names)}
texto = f"Predicci贸n: {label} (confianza {conf:.2f})"
return texto, prob_dict
# ========= INTERFAZ GRADIO =========
demo = gr.Interface(
fn=predict_video_lstm,
inputs=gr.Video(
sources=["upload", "webcam"], # 馃憟 AQU脥 est谩 la diferencia: sin 'source'
label="Video de se帽a (sube o graba)",
format="mp4",
type="filepath" # Gradio le pasa a la funci贸n la ruta del archivo
),
outputs=[
gr.Textbox(label="Resultado"),
gr.Label(label="Probabilidades por clase")
],
title="LSP-EnSe帽as - Demo LSTM",
description=(
"Sube un video corto o gr谩balo con la webcam haciendo una se帽a. "
"El modelo LSTM procesa la secuencia de landmarks (pose + manos) y "
"muestra la clase m谩s probable y la distribuci贸n de probabilidades."
),
)
if __name__ == "__main__":
demo.launch()