File size: 4,901 Bytes
af4ab3b 56da706 af4ab3b 56da706 af4ab3b 56da706 af4ab3b 56da706 af4ab3b 56da706 af4ab3b 56da706 af4ab3b 56da706 af4ab3b 56da706 af4ab3b 56da706 af4ab3b 56da706 af4ab3b 56da706 af4ab3b 56da706 af4ab3b 56da706 af4ab3b 56da706 af4ab3b 56da706 af4ab3b 56da706 af4ab3b 56da706 af4ab3b 56da706 af4ab3b 56da706 af4ab3b 56da706 af4ab3b 56da706 af4ab3b 56da706 af4ab3b 56da706 af4ab3b 56da706 af4ab3b 56da706 af4ab3b 56da706 af4ab3b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
import os
import json
import cv2
import numpy as np
import mediapipe as mp
import tensorflow as tf
import gradio as gr
print("TensorFlow version:", tf.__version__)
# ==== RUTAS DEL MODELO ====
BASE_DIR = os.path.dirname(__file__)
MODELS_DIR = os.path.join(BASE_DIR, "models")
MODEL_PATH = os.path.join(MODELS_DIR, "sign_model_lstm_v1.keras")
LABELS_PATH = os.path.join(MODELS_DIR, "label_names.json")
print("Cargando modelo desde:", MODEL_PATH)
model = tf.keras.models.load_model(MODEL_PATH)
with open(LABELS_PATH, "r") as f:
label_names = json.load(f)
mp_holistic = mp.solutions.holistic
MAX_FRAMES = 20 # mismo valor que usaste al entrenar
N_FEATURES = 225 # 75 puntos * 3 coords (x, y, z)
# ========= FUNCIONES DE PROCESADO =========
def extract_landmarks_from_results(results):
"""
Convierte los resultados de MediaPipe Holistic en un vector 1D (225,)
con pose (33), mano izq (21) y mano der (21).
"""
def get_xyz(landmarks, n_points):
if landmarks is None:
data = [[0.0, 0.0, 0.0]] * n_points
else:
data = [[lm.x, lm.y, lm.z] for lm in landmarks]
if len(data) < n_points:
data += [[0.0, 0.0, 0.0]] * (n_points - len(data))
data = data[:n_points]
return data
pose = get_xyz(
results.pose_landmarks.landmark if results.pose_landmarks else None,
33
)
left_hand = get_xyz(
results.left_hand_landmarks.landmark if results.left_hand_landmarks else None,
21
)
right_hand = get_xyz(
results.right_hand_landmarks.landmark if results.right_hand_landmarks else None,
21
)
all_points = pose + left_hand + right_hand
return np.array(all_points, dtype=np.float32).flatten() # (225,)
def preprocess_video_to_sequence(video_path, max_frames=MAX_FRAMES):
"""
Lee un video, extrae landmarks por frame y devuelve
una secuencia (1, max_frames, 225) lista para el LSTM.
"""
if video_path is None:
raise ValueError("No se recibi贸 ruta de video.")
cap = cv2.VideoCapture(video_path)
frames_feats = []
with mp_holistic.Holistic(
static_image_mode=False,
model_complexity=1,
enable_segmentation=False,
refine_face_landmarks=False,
min_detection_confidence=0.5,
min_tracking_confidence=0.5
) as holistic:
while True:
ret, frame = cap.read()
if not ret:
break
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results = holistic.process(frame_rgb)
vec = extract_landmarks_from_results(results) # (225,)
frames_feats.append(vec)
if len(frames_feats) >= max_frames:
break
cap.release()
if len(frames_feats) == 0:
raise ValueError("El video no tiene frames v谩lidos para procesar.")
seq = np.array(frames_feats, dtype=np.float32)
# padding / recorte
if seq.shape[0] < max_frames:
pad_len = max_frames - seq.shape[0]
pad = np.zeros((pad_len, seq.shape[1]), dtype=np.float32)
seq = np.concatenate([seq, pad], axis=0)
else:
seq = seq[:max_frames, :]
seq = seq.reshape(1, max_frames, seq.shape[1]) # (1, T, 225)
return seq
def predict_video_lstm(video_path):
"""
Funci贸n que usa Gradio:
- Recibe la ruta de un video
- Devuelve la predicci贸n principal + distribuci贸n de probabilidades
"""
if video_path is None:
return "Sube o graba un video primero.", {}
try:
seq = preprocess_video_to_sequence(video_path, max_frames=MAX_FRAMES)
probs = model.predict(seq, verbose=0)[0] # (num_classes,)
except Exception as e:
return f"Error procesando el video: {e}", {}
idx = int(np.argmax(probs))
label = label_names[idx]
conf = float(probs[idx])
prob_dict = {name: float(probs[i]) for i, name in enumerate(label_names)}
texto = f"Predicci贸n: {label} (confianza {conf:.2f})"
return texto, prob_dict
# ========= INTERFAZ GRADIO =========
demo = gr.Interface(
fn=predict_video_lstm,
inputs=gr.Video(
sources=["upload", "webcam"], # 馃憟 AQU脥 est谩 la diferencia: sin 'source'
label="Video de se帽a (sube o graba)",
format="mp4",
type="filepath" # Gradio le pasa a la funci贸n la ruta del archivo
),
outputs=[
gr.Textbox(label="Resultado"),
gr.Label(label="Probabilidades por clase")
],
title="LSP-EnSe帽as - Demo LSTM",
description=(
"Sube un video corto o gr谩balo con la webcam haciendo una se帽a. "
"El modelo LSTM procesa la secuencia de landmarks (pose + manos) y "
"muestra la clase m谩s probable y la distribuci贸n de probabilidades."
),
)
if __name__ == "__main__":
demo.launch()
|