signlang / model.py
Varun Lellapalli
push try
8274c81 unverified
import cv2
import numpy as np
import gradio as gr
import tensorflow as tf
import mediapipe as mp
interpreter = tf.lite.Interpreter(model_path="asl_mlp_model1.tflite")
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=2, min_detection_confidence=0.5)
# Your word labels (update as needed)
labels = []
with open('label1.txt', 'r') as f:
labels = [line.strip() for line in f.readlines()]
def normalize_landmarks(landmarks):
coords = np.array(landmarks).reshape(-1, 3)
base = coords[0]
coords -= base
scale = np.linalg.norm(coords.max(axis=0) - coords.min(axis=0))
if scale > 0:
coords /= scale
return coords.flatten().tolist()
def extract_landmarks(image):
# image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# results = hands.process(image_rgb)
results = hands.process(image)
if results.multi_hand_landmarks:
hand_vectors = []
for hand in results.multi_hand_landmarks[:2]:
vector = [coord for lm in hand.landmark for coord in (lm.x, lm.y, lm.z)]
hand_vectors.append(vector)
mp_drawing.draw_landmarks(image, hand, mp_hands.HAND_CONNECTIONS)
if len(hand_vectors) == 1:
hand_vectors.append([0.0] * 63)
flat = normalize_landmarks(hand_vectors[0] + hand_vectors[1])
return image, flat
return image, None
def predict_from_image(image):
image, landmarks = extract_landmarks(image)
if landmarks is None:
return image, "❌ No hands detected"
input_data = np.array([landmarks], dtype=np.float32)
interpreter.set_tensor(input_details[0]['index'], input_data)
print("Input shape:", input_data.shape)
print("Input sample (truncated):", input_data[0][:10])
interpreter.invoke()
output = interpreter.get_tensor(output_details[0]['index'])[0]
class_id = np.argmax(output)
confidence = output[class_id]
predicted_word = labels[class_id]
return image, f"✅ Predicted: {predicted_word} ({confidence:.2f})"
# Gradio UI
gr.Interface(
fn=predict_from_image,
inputs=gr.Image(label="Input Image", sources=["webcam", "upload"]),
outputs=[gr.Image(label="Landmarks"), gr.Textbox(label="Prediction")],
title="ASL Sign Prediction using TFLite Model"
).launch()