Spaces:

Emeritus-21
/

itsekiri-Sign-Language-Interpreter

Sleeping

File size: 7,049 Bytes

import cv2
import base64
import numpy as np
from flask import Flask, render_template, request, jsonify, send_from_directory
import time
import mediapipe as mp
from mediapipe.framework.formats import landmark_pb2
from mediapipe import solutions
from tflite_support.task import vision as vision2
from tflite_support.task import core, processor
from numpy.linalg import norm

# Flask app setup
app = Flask(__name__)

# Global variables for letter detection results
letter_result = 0
result_to_show = 0
cresult_to_show = 0
letterscore = 0
no_hand_flag = 1

# Initialize MediaPipe hand landmark detection
BaseOptions = mp.tasks.BaseOptions
HandLandmarker = mp.tasks.vision.HandLandmarker
HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
HandLandmarkerResult = mp.tasks.vision.HandLandmarkerResult
VisionRunningMode = mp.tasks.vision.RunningMode

# Load your TFLite models (adjust paths if needed)
cbase_options = core.BaseOptions(file_name="./exported/model.tflite")  # New model
ccbase_options = core.BaseOptions(file_name="./exported/word.tflite")  # Old model or word model

cclassification_options = processor.ClassificationOptions(max_results=1)
coptions = vision2.ImageClassifierOptions(base_options=cbase_options, classification_options=cclassification_options)
ccoptions = vision2.ImageClassifierOptions(base_options=ccbase_options, classification_options=cclassification_options)

cclassifier = vision2.ImageClassifier.create_from_options(coptions)
ccclassifier = vision2.ImageClassifier.create_from_options(ccoptions)

# Callback to store MediaPipe detection results asynchronously
RESULT = None

def print_result(result: HandLandmarkerResult, output_image: mp.Image, timestamp_ms: int):
    global RESULT
    RESULT = result

options = HandLandmarkerOptions(
    base_options=BaseOptions(model_asset_path='hand_landmarker.task'),
    running_mode=VisionRunningMode.LIVE_STREAM,
    result_callback=print_result)

detector = mp.tasks.vision.HandLandmarker.create_from_options(options)

# Utility functions for image processing
def data_uri_to_image(data_uri):
    header, encoded = data_uri.split(',', 1)
    decoded_data = base64.b64decode(encoded)
    nparr = np.frombuffer(decoded_data, np.uint8)
    image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
    return image 

def image_to_data_uri(image):
    _, buffer = cv2.imencode('.jpg', image)
    image_bytes = buffer.tobytes()
    base64_encoded = base64.b64encode(image_bytes).decode('utf-8')
    return f"data:image/jpeg;base64,{base64_encoded}"

def draw_landmarks_on_image(rgb_image, detection_result):
    hand_landmarks_list = detection_result.hand_landmarks
    annotated_image = np.copy(rgb_image)
    image_height, image_width, _ = annotated_image.shape

    for idx in range(len(hand_landmarks_list)):
        hand_landmarks = hand_landmarks_list[idx]
        hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
        hand_landmarks_proto.landmark.extend([
            landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
        ])
        solutions.drawing_utils.draw_landmarks(
            annotated_image,
            hand_landmarks_proto,
            solutions.hands.HAND_CONNECTIONS,
            solutions.drawing_styles.get_default_hand_landmarks_style(),
            solutions.drawing_styles.get_default_hand_connections_style()
        )
    return annotated_image

# Letter list - modify if needed
letter_list = [chr(i) for i in range(65, 91)] + ['#']  # A-Z + #

# Isẹ̀kiri dictionary (Example mapping, update with real words)
isekiri_dict = {
    'A': 'Àṣẹ',
    'B': 'Bí',
    'C': 'Ṣe',
    'D': 'Dá',
    'E': 'Ẹ̀',
    'F': 'Fẹ́',
    'G': 'Gba',
    'H': 'Hàn',
    'I': 'Ìyà',
    'J': 'Jẹ',
    'K': 'Kọ',
    'L': 'Lá',
    'M': 'Má',
    'N': 'Ná',
    'O': 'Ọ̀',
    'P': 'Pẹ̀',
    'Q': 'Kù',  # approximate since Q rarely used
    'R': 'Rà',
    'S': 'Ṣá',
    'T': 'Tẹ',
    'U': 'Ú',
    'V': 'Vẹ',
    'W': 'Wá',
    'X': 'Ẹ́s',
    'Y': 'Yá',
    'Z': 'Zà',
    '#': '#'
}

# Routes for web UI and models
@app.route('/')
def index():
    return render_template('index.html')

@app.route('/exported/<path:filename>')
def send_model(filename):
    return send_from_directory('exported', filename)

# Video frame processing API (ASL detection)
@app.route('/api/data', methods=['POST'])
def handle_video_frame():
    global letter_result, result_to_show, cresult_to_show, letterscore, no_hand_flag

    frame_data_uri = request.json.get('key')
    if not frame_data_uri:
        return jsonify({'error': 'No frame data received'}), 400

    frame = data_uri_to_image(frame_data_uri)
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)

    try:
        detection_result = detector.detect_async(mp_image, mp.Timestamp.from_seconds(time.time()).value)

        global RESULT
        if RESULT is None:
            return jsonify({'result': '_', 'frame': frame_data_uri})

        annotated_image = draw_landmarks_on_image(frame, RESULT)
        
        if RESULT.handedness:
            no_hand_flag = 0
            # If right hand detected, classify using models
            if RESULT.handedness[0][0].display_name == 'Right':
                tf_image = vision2.TensorImage.create_from_array(frame)
                classification_result = cclassifier.classify(tf_image)
                cclassification_result = ccclassifier.classify(tf_image)
                
                result_to_show = classification_result.classifications[0].categories[0].category_name
                cresult_to_show = cclassification_result.classifications[0].categories[0].category_name
                
                # Simple decision logic between old and new models
                if cclassification_result.classifications[0].categories[0].score > classification_result.classifications[0].categories[0].score:
                    letter_result = cresult_to_show
                else:
                    letter_result = result_to_show
                letterscore = max(
                    classification_result.classifications[0].categories[0].score,
                    cclassification_result.classifications[0].categories[0].score
                )
            else:
                letter_result = '_'
        else:
            letter_result = '_'

    except Exception as e:
        print("Detection error:", e)
        letter_result = '_'
        annotated_image = frame

    frame_out = image_to_data_uri(annotated_image)
    return jsonify({"result": letter_result, "frame": frame_out})

# Isẹ̀kiri translation API
@app.route('/api/translate', methods=['POST'])
def translate_to_isekiri():
    data = request.json
    text = data.get('text', '')
    # Translate each letter to Isẹ̀kiri word or keep as is if unknown
    translated = ' '.join(isekiri_dict.get(ch.upper(), ch) for ch in text if ch.strip())
    return jsonify({'isekiri': translated})

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=7860)