import os import json import cv2 import numpy as np import mediapipe as mp import tensorflow as tf import gradio as gr print("TensorFlow version:", tf.__version__) # ==== RUTAS DEL MODELO ==== BASE_DIR = os.path.dirname(__file__) MODELS_DIR = os.path.join(BASE_DIR, "models") MODEL_PATH = os.path.join(MODELS_DIR, "sign_model_lstm_v1.keras") LABELS_PATH = os.path.join(MODELS_DIR, "label_names.json") print("Cargando modelo desde:", MODEL_PATH) model = tf.keras.models.load_model(MODEL_PATH) with open(LABELS_PATH, "r") as f: label_names = json.load(f) mp_holistic = mp.solutions.holistic MAX_FRAMES = 20 # mismo valor que usaste al entrenar N_FEATURES = 225 # 75 puntos * 3 coords (x, y, z) # ========= FUNCIONES DE PROCESADO ========= def extract_landmarks_from_results(results): """ Convierte los resultados de MediaPipe Holistic en un vector 1D (225,) con pose (33), mano izq (21) y mano der (21). """ def get_xyz(landmarks, n_points): if landmarks is None: data = [[0.0, 0.0, 0.0]] * n_points else: data = [[lm.x, lm.y, lm.z] for lm in landmarks] if len(data) < n_points: data += [[0.0, 0.0, 0.0]] * (n_points - len(data)) data = data[:n_points] return data pose = get_xyz( results.pose_landmarks.landmark if results.pose_landmarks else None, 33 ) left_hand = get_xyz( results.left_hand_landmarks.landmark if results.left_hand_landmarks else None, 21 ) right_hand = get_xyz( results.right_hand_landmarks.landmark if results.right_hand_landmarks else None, 21 ) all_points = pose + left_hand + right_hand return np.array(all_points, dtype=np.float32).flatten() # (225,) def preprocess_video_to_sequence(video_path, max_frames=MAX_FRAMES): """ Lee un video, extrae landmarks por frame y devuelve una secuencia (1, max_frames, 225) lista para el LSTM. """ if video_path is None: raise ValueError("No se recibió ruta de video.") cap = cv2.VideoCapture(video_path) frames_feats = [] with mp_holistic.Holistic( static_image_mode=False, model_complexity=1, enable_segmentation=False, refine_face_landmarks=False, min_detection_confidence=0.5, min_tracking_confidence=0.5 ) as holistic: while True: ret, frame = cap.read() if not ret: break frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) results = holistic.process(frame_rgb) vec = extract_landmarks_from_results(results) # (225,) frames_feats.append(vec) if len(frames_feats) >= max_frames: break cap.release() if len(frames_feats) == 0: raise ValueError("El video no tiene frames válidos para procesar.") seq = np.array(frames_feats, dtype=np.float32) # padding / recorte if seq.shape[0] < max_frames: pad_len = max_frames - seq.shape[0] pad = np.zeros((pad_len, seq.shape[1]), dtype=np.float32) seq = np.concatenate([seq, pad], axis=0) else: seq = seq[:max_frames, :] seq = seq.reshape(1, max_frames, seq.shape[1]) # (1, T, 225) return seq def predict_video_lstm(video_path): """ Función que usa Gradio: - Recibe la ruta de un video - Devuelve la predicción principal + distribución de probabilidades """ if video_path is None: return "Sube o graba un video primero.", {} try: seq = preprocess_video_to_sequence(video_path, max_frames=MAX_FRAMES) probs = model.predict(seq, verbose=0)[0] # (num_classes,) except Exception as e: return f"Error procesando el video: {e}", {} idx = int(np.argmax(probs)) label = label_names[idx] conf = float(probs[idx]) prob_dict = {name: float(probs[i]) for i, name in enumerate(label_names)} texto = f"Predicción: {label} (confianza {conf:.2f})" return texto, prob_dict # ========= INTERFAZ GRADIO ========= demo = gr.Interface( fn=predict_video_lstm, inputs=gr.Video( sources=["upload", "webcam"], # 👈 AQUÍ está la diferencia: sin 'source' label="Video de seña (sube o graba)", format="mp4", type="filepath" # Gradio le pasa a la función la ruta del archivo ), outputs=[ gr.Textbox(label="Resultado"), gr.Label(label="Probabilidades por clase") ], title="LSP-EnSeñas - Demo LSTM", description=( "Sube un video corto o grábalo con la webcam haciendo una seña. " "El modelo LSTM procesa la secuencia de landmarks (pose + manos) y " "muestra la clase más probable y la distribución de probabilidades." ), ) if __name__ == "__main__": demo.launch()