File size: 4,901 Bytes
af4ab3b
 
56da706
af4ab3b
 
 
 
 
56da706
af4ab3b
56da706
 
 
af4ab3b
56da706
 
af4ab3b
 
56da706
af4ab3b
 
 
 
 
 
56da706
 
 
af4ab3b
56da706
af4ab3b
 
 
56da706
 
af4ab3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56da706
 
af4ab3b
56da706
 
af4ab3b
56da706
af4ab3b
56da706
af4ab3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56da706
af4ab3b
 
 
 
 
 
 
 
 
 
 
 
56da706
af4ab3b
 
 
 
 
 
 
 
 
 
 
56da706
af4ab3b
56da706
 
 
af4ab3b
56da706
af4ab3b
 
 
56da706
af4ab3b
56da706
 
af4ab3b
56da706
 
 
af4ab3b
56da706
af4ab3b
56da706
 
af4ab3b
 
56da706
af4ab3b
 
56da706
af4ab3b
56da706
 
 
 
af4ab3b
 
 
 
 
56da706
 
 
 
 
 
af4ab3b
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import os
import json
import cv2
import numpy as np
import mediapipe as mp
import tensorflow as tf
import gradio as gr

print("TensorFlow version:", tf.__version__)

# ==== RUTAS DEL MODELO ====
BASE_DIR = os.path.dirname(__file__)
MODELS_DIR = os.path.join(BASE_DIR, "models")

MODEL_PATH = os.path.join(MODELS_DIR, "sign_model_lstm_v1.keras")
LABELS_PATH = os.path.join(MODELS_DIR, "label_names.json")

print("Cargando modelo desde:", MODEL_PATH)
model = tf.keras.models.load_model(MODEL_PATH)

with open(LABELS_PATH, "r") as f:
    label_names = json.load(f)

mp_holistic = mp.solutions.holistic

MAX_FRAMES = 20          # mismo valor que usaste al entrenar
N_FEATURES = 225         # 75 puntos * 3 coords (x, y, z)


# ========= FUNCIONES DE PROCESADO =========

def extract_landmarks_from_results(results):
    """
    Convierte los resultados de MediaPipe Holistic en un vector 1D (225,)
    con pose (33), mano izq (21) y mano der (21).
    """
    def get_xyz(landmarks, n_points):
        if landmarks is None:
            data = [[0.0, 0.0, 0.0]] * n_points
        else:
            data = [[lm.x, lm.y, lm.z] for lm in landmarks]
            if len(data) < n_points:
                data += [[0.0, 0.0, 0.0]] * (n_points - len(data))
            data = data[:n_points]
        return data

    pose = get_xyz(
        results.pose_landmarks.landmark if results.pose_landmarks else None,
        33
    )
    left_hand = get_xyz(
        results.left_hand_landmarks.landmark if results.left_hand_landmarks else None,
        21
    )
    right_hand = get_xyz(
        results.right_hand_landmarks.landmark if results.right_hand_landmarks else None,
        21
    )

    all_points = pose + left_hand + right_hand
    return np.array(all_points, dtype=np.float32).flatten()  # (225,)


def preprocess_video_to_sequence(video_path, max_frames=MAX_FRAMES):
    """
    Lee un video, extrae landmarks por frame y devuelve
    una secuencia (1, max_frames, 225) lista para el LSTM.
    """
    if video_path is None:
        raise ValueError("No se recibi贸 ruta de video.")

    cap = cv2.VideoCapture(video_path)
    frames_feats = []

    with mp_holistic.Holistic(
        static_image_mode=False,
        model_complexity=1,
        enable_segmentation=False,
        refine_face_landmarks=False,
        min_detection_confidence=0.5,
        min_tracking_confidence=0.5
    ) as holistic:

        while True:
            ret, frame = cap.read()
            if not ret:
                break

            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = holistic.process(frame_rgb)

            vec = extract_landmarks_from_results(results)  # (225,)
            frames_feats.append(vec)

            if len(frames_feats) >= max_frames:
                break

    cap.release()

    if len(frames_feats) == 0:
        raise ValueError("El video no tiene frames v谩lidos para procesar.")

    seq = np.array(frames_feats, dtype=np.float32)

    # padding / recorte
    if seq.shape[0] < max_frames:
        pad_len = max_frames - seq.shape[0]
        pad = np.zeros((pad_len, seq.shape[1]), dtype=np.float32)
        seq = np.concatenate([seq, pad], axis=0)
    else:
        seq = seq[:max_frames, :]

    seq = seq.reshape(1, max_frames, seq.shape[1])  # (1, T, 225)
    return seq


def predict_video_lstm(video_path):
    """
    Funci贸n que usa Gradio:
    - Recibe la ruta de un video
    - Devuelve la predicci贸n principal + distribuci贸n de probabilidades
    """
    if video_path is None:
        return "Sube o graba un video primero.", {}

    try:
        seq = preprocess_video_to_sequence(video_path, max_frames=MAX_FRAMES)
        probs = model.predict(seq, verbose=0)[0]  # (num_classes,)
    except Exception as e:
        return f"Error procesando el video: {e}", {}

    idx = int(np.argmax(probs))
    label = label_names[idx]
    conf = float(probs[idx])

    prob_dict = {name: float(probs[i]) for i, name in enumerate(label_names)}

    texto = f"Predicci贸n: {label} (confianza {conf:.2f})"
    return texto, prob_dict


# ========= INTERFAZ GRADIO =========

demo = gr.Interface(
    fn=predict_video_lstm,
    inputs=gr.Video(
        sources=["upload", "webcam"],   # 馃憟 AQU脥 est谩 la diferencia: sin 'source'
        label="Video de se帽a (sube o graba)",
        format="mp4",
        type="filepath"                 # Gradio le pasa a la funci贸n la ruta del archivo
    ),
    outputs=[
        gr.Textbox(label="Resultado"),
        gr.Label(label="Probabilidades por clase")
    ],
    title="LSP-EnSe帽as - Demo LSTM",
    description=(
        "Sube un video corto o gr谩balo con la webcam haciendo una se帽a. "
        "El modelo LSTM procesa la secuencia de landmarks (pose + manos) y "
        "muestra la clase m谩s probable y la distribuci贸n de probabilidades."
    ),
)

if __name__ == "__main__":
    demo.launch()