| import cv2 |
| import numpy as np |
| import gradio as gr |
| import tensorflow as tf |
| import mediapipe as mp |
|
|
| interpreter = tf.lite.Interpreter(model_path="asl_mlp_model1.tflite") |
| interpreter.allocate_tensors() |
|
|
| input_details = interpreter.get_input_details() |
| output_details = interpreter.get_output_details() |
|
|
| mp_hands = mp.solutions.hands |
| mp_drawing = mp.solutions.drawing_utils |
| hands = mp_hands.Hands(static_image_mode=True, max_num_hands=2, min_detection_confidence=0.5) |
|
|
| |
| labels = [] |
| with open('label1.txt', 'r') as f: |
| labels = [line.strip() for line in f.readlines()] |
|
|
| def normalize_landmarks(landmarks): |
| coords = np.array(landmarks).reshape(-1, 3) |
| base = coords[0] |
| coords -= base |
| scale = np.linalg.norm(coords.max(axis=0) - coords.min(axis=0)) |
| if scale > 0: |
| coords /= scale |
| return coords.flatten().tolist() |
|
|
| def extract_landmarks(image): |
| |
| |
| results = hands.process(image) |
| if results.multi_hand_landmarks: |
| hand_vectors = [] |
| for hand in results.multi_hand_landmarks[:2]: |
| vector = [coord for lm in hand.landmark for coord in (lm.x, lm.y, lm.z)] |
| hand_vectors.append(vector) |
| mp_drawing.draw_landmarks(image, hand, mp_hands.HAND_CONNECTIONS) |
| if len(hand_vectors) == 1: |
| hand_vectors.append([0.0] * 63) |
| flat = normalize_landmarks(hand_vectors[0] + hand_vectors[1]) |
| return image, flat |
| return image, None |
|
|
| def predict_from_image(image): |
| image, landmarks = extract_landmarks(image) |
| if landmarks is None: |
| return image, "❌ No hands detected" |
| input_data = np.array([landmarks], dtype=np.float32) |
| interpreter.set_tensor(input_details[0]['index'], input_data) |
| print("Input shape:", input_data.shape) |
| print("Input sample (truncated):", input_data[0][:10]) |
| interpreter.invoke() |
| output = interpreter.get_tensor(output_details[0]['index'])[0] |
| class_id = np.argmax(output) |
| confidence = output[class_id] |
| predicted_word = labels[class_id] |
| return image, f"✅ Predicted: {predicted_word} ({confidence:.2f})" |
|
|
| |
| gr.Interface( |
| fn=predict_from_image, |
| inputs=gr.Image(label="Input Image", sources=["webcam", "upload"]), |
| outputs=[gr.Image(label="Landmarks"), gr.Textbox(label="Prediction")], |
| title="ASL Sign Prediction using TFLite Model" |
| ).launch() |