Spaces:

venkatl
/

signlang

Sleeping

signlang / model.py

Varun Lellapalli

push try

8274c81 unverified 8 months ago

2.47 kB

	import cv2
	import numpy as np
	import gradio as gr
	import tensorflow as tf
	import mediapipe as mp

	interpreter = tf.lite.Interpreter(model_path="asl_mlp_model1.tflite")
	interpreter.allocate_tensors()

	input_details = interpreter.get_input_details()
	output_details = interpreter.get_output_details()

	mp_hands = mp.solutions.hands
	mp_drawing = mp.solutions.drawing_utils
	hands = mp_hands.Hands(static_image_mode=True, max_num_hands=2, min_detection_confidence=0.5)

	# Your word labels (update as needed)
	labels = []
	with open('label1.txt', 'r') as f:
	labels = [line.strip() for line in f.readlines()]

	def normalize_landmarks(landmarks):
	coords = np.array(landmarks).reshape(-1, 3)
	base = coords[0]
	coords -= base
	scale = np.linalg.norm(coords.max(axis=0) - coords.min(axis=0))
	if scale > 0:
	coords /= scale
	return coords.flatten().tolist()

	def extract_landmarks(image):
	# image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
	# results = hands.process(image_rgb)
	results = hands.process(image)
	if results.multi_hand_landmarks:
	hand_vectors = []
	for hand in results.multi_hand_landmarks[:2]:
	vector = [coord for lm in hand.landmark for coord in (lm.x, lm.y, lm.z)]
	hand_vectors.append(vector)
	mp_drawing.draw_landmarks(image, hand, mp_hands.HAND_CONNECTIONS)
	if len(hand_vectors) == 1:
	hand_vectors.append([0.0] * 63)
	flat = normalize_landmarks(hand_vectors[0] + hand_vectors[1])
	return image, flat
	return image, None

	def predict_from_image(image):
	image, landmarks = extract_landmarks(image)
	if landmarks is None:
	return image, "❌ No hands detected"
	input_data = np.array([landmarks], dtype=np.float32)
	interpreter.set_tensor(input_details[0]['index'], input_data)
	print("Input shape:", input_data.shape)
	print("Input sample (truncated):", input_data[0][:10])
	interpreter.invoke()
	output = interpreter.get_tensor(output_details[0]['index'])[0]
	class_id = np.argmax(output)
	confidence = output[class_id]
	predicted_word = labels[class_id]
	return image, f"✅ Predicted: {predicted_word} ({confidence:.2f})"

	# Gradio UI
	gr.Interface(
	fn=predict_from_image,
	inputs=gr.Image(label="Input Image", sources=["webcam", "upload"]),
	outputs=[gr.Image(label="Landmarks"), gr.Textbox(label="Prediction")],
	title="ASL Sign Prediction using TFLite Model"
	).launch()