Spaces:

Jin-Ho
/

SignMeUp_Streamlit

Sleeping

App Files Files Community

“Jin-HoMichaelLee” commited on Apr 26, 2023

Commit

e817788

1 Parent(s): 606339c

Add application file

Browse files

Files changed (4) hide show

app.py +166 -0
functions.py +221 -0
params.py +111 -0
requirements.txt +80 -0

app.py ADDED Viewed

	@@ -0,0 +1,166 @@

+import streamlit as st
+import mediapipe as mp
+import cv2 as cv
+import numpy as np
+import tempfile
+import time
+# developer modules
+from functions import draw_styled_landmarks, real_time_prediction
+from params import LENGTH, SELECTED_SIGNS, TRANSITION_FRAMES, SELECTED_LABELS, MODEL
+# ------------------------------
+# Basic App Scaffolding
+# ------------------------------
+# Title
+st.title('SignMeUp')
+# Markdown styling
+st.markdown(
+    """
+    <style>
+    [data-testid="stSidebar"][aria-expanded="true"] > div:first-child{
+        width: 350px
+    }
+    [data-testid="stSidebar"][aria-expanded="false"] > div:first-child{
+        width: 350px
+        margin-left: -350px
+    }
+    </style>
+    """,
+    unsafe_allow_html=True,
+)
+# Create Sidebar
+st.sidebar.title('SignMeUp Sidebar')
+st.sidebar.subheader('Parameter')
+# Define available pages in selection box
+app_mode = st.sidebar.selectbox(
+    'App Mode',
+    ['Video Recognition', 'About', 'Contact']
+)
+# ------------------------------
+# About Page
+# ------------------------------
+if app_mode == 'About':
+    st.markdown('''
+                ## About \n
+                In this application we are using **MediaPipe** landmark prediction for recognizing American Sign Language. **StreamLit** is used to create the Web Graphical User Interface (GUI) \n
+                - [Github](https://github.com/vosmani36/Capstone_Project_SignMeUp/tree/main/notebooks) \n
+    ''')
+    ## Add Sidebar and Window style
+    st.markdown(
+        """
+        <style>
+        [data-testid="stSidebar"][aria-expanded="true"] > div:first-child{
+            width: 350px
+        }
+        [data-testid="stSidebar"][aria-expanded="false"] > div:first-child{
+            width: 350px
+            margin-left: -350px
+        }
+        </style>
+        """,
+        unsafe_allow_html=True,
+    )
+# ------------------------------
+# Video Recognition Page
+# ------------------------------
+elif app_mode == 'Video Recognition':
+    st.set_option('deprecation.showfileUploaderEncoding', False)
+    use_webcam = st.sidebar.button('Use Webcam')
+    ## Get Video
+    stframe = st.empty()
+    temp_file = tempfile.NamedTemporaryFile(delete=False)
+    if use_webcam:
+        video = cv.VideoCapture(0)
+    else:
+        video = cv.VideoCapture('https://cdn.dribbble.com/users/17914/screenshots/4902225/video-placeholder.png')
+    width = int(video.get(cv.CAP_PROP_FRAME_WIDTH))
+    height = int(video.get(cv.CAP_PROP_FRAME_HEIGHT))
+    fps_input = int(video.get(cv.CAP_PROP_FPS))
+    ## Recording
+    fps = 0
+    sign_recognized = ' '
+    prob_recognized = 0
+    i = 0
+    kpil, kpil2, kpil3 = st.columns(3)
+    with kpil:
+        st.markdown('**Frame Rate**')
+        kpil_text = st.markdown('0')
+    with kpil2:
+        st.markdown('**Sign**')
+        kpil2_text = st.markdown('0')
+    with kpil3:
+        st.markdown('**Probability**')
+        kpil3_text = st.markdown('0')
+    st.markdown('<hr/>', unsafe_allow_html=True)
+    ## Live Video Mediapipe Holistic
+    # New detection variables
+    sequence = [] # to collect all 22 frames for prediction
+    sentence = [] # history of all predictions (predicted words)
+    predictions = []
+    threshold = 0.5 # confidence metrics (only render prediction results, if confidence is above threshold)
+    # Real-time prediction
+    with mp.solutions.holistic.Holistic(
+        min_detection_confidence=0.5,
+        min_tracking_confidence=0.5
+    ) as holistic:
+            prevTime = 0
+            while video.isOpened():
+                i +=1
+                ret, frame = video.read()
+                if not ret:
+                    continue
+                # Make MediaPipe detections
+                results = holistic.process(frame)
+                # Draw detected landmarks
+                draw_styled_landmarks(frame, results)
+                # Real-time prediction
+                sign_recognized, prob_recognized = real_time_prediction(results, sequence, predictions, threshold, LENGTH, MODEL, SELECTED_LABELS, TRANSITION_FRAMES, SELECTED_SIGNS)
+                # FPS Counter
+                currTime = time.time()
+                fps = 1/(currTime - prevTime)
+                prevTime = currTime
+                # Dashboard
+                kpil_text.write(f"<h1 style='text-align: center; color:(52, 75, 102);'>{int(fps)}</h1>", unsafe_allow_html=True)
+                kpil2_text.write(f"<h1 style='text-align: center; color:(52, 75, 102);'>{sign_recognized}</h1>", unsafe_allow_html=True)
+                kpil3_text.write(f"<h1 style='text-align: center; color:(52, 75, 102);'>{prob_recognized}</h1>",
+                                 unsafe_allow_html=True)
+                frame = cv.resize(frame,(0,0), fx=0.8, fy=0.8)
+                stframe.image(frame,channels='BGR', use_column_width=True)

functions.py ADDED Viewed

	@@ -0,0 +1,221 @@

+import numpy as np
+import pandas as pd
+import cv2 # for camera feed
+import mediapipe as mp # for accessing and reading from webcam
+import tensorflow as tf
+# developer modules
+from params import  LENGTH, DROP_Z, averaging_sets, point_landmarks_left, point_landmarks_right, FLATTEN, INPUT_SHAPE, RIGHT_HAND, LEFT_HAND, PADDING, CONSTANT_VALUE
+# Initiate mediapipe model and utils
+mp_holistic = mp.solutions.holistic # holistic model
+mp_drawing = mp.solutions.drawing_utils # drawing utilities
+# ------------------------------
+# Mediapipe
+# ------------------------------
+# function to extract coordinates (+visibility) of all landmarks --> keypoints
+# and concatenates everything into a flattened list
+def extract_keypoints(results):
+    face = np.array([[r.x, r.y, r.z] for r in results.face_landmarks.landmark]) if results.face_landmarks else np.zeros([468, 3])
+    left_hand = np.array([[r.x, r.y, r.z] for r in results.left_hand_landmarks.landmark]) if results.left_hand_landmarks else np.zeros([21, 3])
+    pose = np.array([[r.x, r.y, r.z] for r in results.pose_landmarks.landmark]) if results.pose_landmarks else np.zeros([33, 3]) # x, y, z and extra value visibility
+    right_hand = np.array([[r.x, r.y, r.z] for r in results.right_hand_landmarks.landmark]) if results.right_hand_landmarks else np.zeros([21, 3])
+    return np.concatenate([face, left_hand, pose, right_hand]) # original code
+    # a flattened list with list of all face, left_hand, pose, right_hand landmark x, y, z, (+visibility) coordinates
+# ------------------------------
+# Visualization
+# ------------------------------
+# function to draw landmarks points and connecting lines on top of an image, e.g. on top of your camera feed
+def draw_styled_landmarks(image, results):
+    # draw face connections
+    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION,
+                              mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
+                              mp_drawing.DrawingSpec(color=(224,208,64), thickness=1, circle_radius=1))
+    # draw pose connections
+    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
+                              mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
+                              mp_drawing.DrawingSpec(color=(224,208,64), thickness=2, circle_radius=2))
+    # draw left hand connections
+    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
+                              mp_drawing.DrawingSpec(color=(224,208,64), thickness=2, circle_radius=4),
+                              mp_drawing.DrawingSpec(color=(235,206,135), thickness=2, circle_radius=2))
+    # draw right hand connections
+    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
+                              mp_drawing.DrawingSpec(color=(224,208,64), thickness=2, circle_radius=4),
+                              mp_drawing.DrawingSpec(color=(128,128,240), thickness=2, circle_radius=2))
+# function to visualize predicted word probabilities with a dynamic real-time bar chart
+def prob_viz(pred, SELECTED_SIGNS, input_frame):
+    output_frame = input_frame.copy()
+    bar_zero = 15
+    for num, prob in enumerate(pred):
+        cv2.rectangle(output_frame,
+                      pt1=(bar_zero, 65+num*50),
+                      pt2=(bar_zero+int(prob*100*5), 95+num*50),
+                      color=(200, 200, 200), thickness=-1)
+        # cv2.rectangle(image, start_point, end_point, color, thickness)
+        cv2.putText(img=output_frame,
+                    text=SELECTED_SIGNS[num],
+                    org=(bar_zero, 90+num*50),
+                    fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1,
+                    color=(50, 50, 50),
+                    thickness=1, lineType=cv2.LINE_AA)
+        # cv2.putText(image, 'OpenCV', org, font, fontScale, color, thickness, cv2.LINE_AA)
+    return output_frame
+# ------------------------------
+# Pre-processing
+# ------------------------------
+# helper function for pre-processing
+def tf_nan_mean(x, axis=0):
+    #calculates the mean of a TensorFlow tensor x along a specified axis while ignoring any NaN values in the tensor.
+    return tf.reduce_sum(tf.where(tf.math.is_nan(x), tf.zeros_like(x), x), axis=axis) / tf.reduce_sum(tf.where(tf.math.is_nan(x), tf.zeros_like(x), tf.ones_like(x)), axis=axis)
+# helper function for pre-processing
+def right_hand_percentage(x):
+    #calculates percentage of right hand usage
+    right = tf.gather(x, RIGHT_HAND, axis=1)
+    left = tf.gather(x, LEFT_HAND, axis=1)
+    right_count = tf.reduce_sum(tf.where(tf.math.is_nan(right), tf.zeros_like(right), tf.ones_like(right)))
+    left_count = tf.reduce_sum(tf.where(tf.math.is_nan(left), tf.zeros_like(left), tf.ones_like(left)))
+    return right_count / (left_count+right_count)
+#generating preprocessing layer that will be added to final model
+class FeatureGen(tf.keras.layers.Layer):
+    #defines custom tensorflow layer
+    def __init__(self):
+        #initializes layer
+        super(FeatureGen, self).__init__()
+    def call(self, x_in, MIRROR=False):
+        #drop z coordinates if required
+        if DROP_Z:
+            x_in = x_in[:, :, 0:2]
+        if MIRROR:
+            #flipping x coordinates
+            x_in = np.array(x_in)
+            x_in[:, :, 0] = (x_in[:, :, 0]-1)*(-1)
+            x_in = tf.convert_to_tensor(x_in)
+        #generates list with mean values for landmarks that will be merged
+        x_list = [tf.expand_dims(tf_nan_mean(x_in[:, av_set[0]:av_set[0]+av_set[1], :], axis=1), axis=1) for av_set in averaging_sets]
+        #extracts specific columns from input x_in defined by landmarks
+        handedness = right_hand_percentage(x_in)
+        if handedness > 0.5:
+            x_list.append(tf.gather(x_in, point_landmarks_right, axis=1))
+        else:
+            x_list.append(tf.gather(x_in, point_landmarks_left, axis=1))
+        #concatenates the two tensors from above along axis 1/columns
+        x = tf.concat(x_list, 1)
+        #padding to desired length of sequence (defined by LENGTH)
+        #get current number of rows
+        x_padded = x
+        current_rows = tf.shape(x_padded)[0]
+        #if current number of rows is greater than desired number of rows, truncate excess rows
+        if current_rows > LENGTH:
+            x_padded = x_padded[:LENGTH, :, :]
+        #if current number of rows is less than desired number of rows, add padding
+        elif current_rows < LENGTH:
+            #calculate amount of padding needed
+            pad_rows = LENGTH - current_rows
+            if PADDING ==4: #copy first/last frame
+                if pad_rows %2 == 0: #if pad_rows is even
+                    padding_front = tf.repeat(x_padded[0:1, :], pad_rows//2, axis=0)
+                    padding_back = tf.repeat(x_padded[-1:, :], pad_rows//2, axis=0)
+                else: #if pad_rows is odd
+                    padding_front = tf.repeat(x_padded[0:1, :], (pad_rows//2)+1, axis=0)
+                    padding_back = tf.repeat(x_padded[-1:, :], pad_rows//2, axis=0)
+                x_padded = tf.concat([padding_front, x_padded, padding_back], axis=0)
+            elif PADDING == 5: #copy last frame
+                padding_back = tf.repeat(x_padded[-1:, :], pad_rows, axis=0)
+                x_padded = tf.concat([x_padded, padding_back], axis=0)
+            else:
+                if PADDING ==1: #padding at start and end
+                    if pad_rows %2 == 0: #if pad_rows is even
+                        paddings = [[pad_rows//2, pad_rows//2], [0, 0], [0, 0]]
+                    else: #if pad_rows is odd
+                        paddings = [[pad_rows//2+1, pad_rows//2], [0, 0], [0, 0]]
+                elif PADDING ==2: #padding only at the end of sequence
+                    paddings = [[0, pad_rows], [0, 0], [0, 0]]
+                elif PADDING ==3: #no padding
+                    paddings = [[0, 0], [0, 0], [0, 0]]
+                x_padded = tf.pad(x_padded, paddings, mode='CONSTANT', constant_values=CONSTANT_VALUE)
+        x = x_padded
+        current_rows = tf.shape(x)[0]
+        #interpolate single missing values
+        x = pd.DataFrame(np.array(x).flatten()).interpolate(method='linear', limit=2, limit_direction='both')
+        #fill missing values with zeros
+        x = tf.where(tf.math.is_nan(x), tf.zeros_like(x), x)
+        #reshape data to 2D or 3D array
+        if FLATTEN:
+            x = tf.reshape(x, (1, current_rows*INPUT_SHAPE[1]))
+        else:
+            x = tf.reshape(x, (1, current_rows, INPUT_SHAPE[1]))
+        return x
+#define converter using generated layer
+feature_converter = FeatureGen()
+# ------------------------------
+# Real-time prediction
+# ------------------------------
+def real_time_prediction(results, sequence, predictions, threshold, LENGTH, MODEL, SELECTED_LABELS, TRANSITION_FRAMES, SELECTED_SIGNS):
+    sign = ''
+    prob = 0
+    # Extract key points into a sequence
+    keypoints = extract_keypoints(results) # extract keypoints x, y, z for face, left_hand, pose, right_hand from mediapipe holistic predictions, keypoints.shape e.g. (543, 3)
+    sequence.append(keypoints) # keep appending keypoints (frames) to a sequence, np.array(sequence).shape e.g. (22, 543, 3)
+    sequence = sequence[-LENGTH:] # takes last e.g. 22 frames of the sequence
+    # Predict upon full sequence
+    if len(sequence) == LENGTH:
+        # pre-processing
+        model_input = feature_converter(np.array(sequence))
+        #print(f'OMG! Frenzy Franzi is converting your mediapipe input! See how the shape is changing from {np.array(sequence).shape} to {model_input.shape}! SO AWESOME!!!')
+        # prediction
+        pred = MODEL.predict(model_input)[0] # MODEL.fit() expects something in shape (num_sequences, 30, 1662), e.g. (1, 30, 1662) for a single sequence
+        pred = pred[SELECTED_LABELS] # selects only a subset of signs, as defined in SELECTED_LABELS
+        predictions.append(np.argmax(pred)) # appends all predictions
+        # 3. Visualization logic
+        # makes sure the last x frames had the same prediction (more stable transition from one sign to another)
+        if np.unique(predictions[-TRANSITION_FRAMES:])[0]==np.argmax(pred):
+            # if the confidence of the most confident prediction is above threshold
+            if pred[np.argmax(pred)] > threshold:
+                sign = SELECTED_SIGNS[np.argmax(pred)]
+                prob = pred[np.argmax(pred)]
+                prob = np.round(float(prob), 2)
+            else:
+                sign = ' '
+                prob = 0
+    return sign, prob
+# ------------------------------
+# Streamlit
+# ------------------------------

params.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import tensorflow as tf
+# load model
+MODEL = tf.keras.models.load_model('models/LSTM_model_20signs_7.h5')
+#------------------------------
+# PRE-PROCESSING CONFIGURATION
+#------------------------------
+#limit dataset for quick test
+QUICK_TEST = True
+QUICK_LIMIT = 500
+#Define length of sequences for padding or cutting; 22 is the median length of all sequences
+LENGTH = 22
+#define min or max length of sequences; sequences too long/too short will be dropped
+#max value of 92 was defined by calculating the interquartile range
+MIN_LENGTH = 10
+MAX_LENGTH = 92
+#final data will be flattened, if false data will be 3 dimensional
+FLATTEN = False
+#define initialization of numpy array
+ARRAY = False #(True=Zeros, False=empty values)
+#Define padding mode
+#1 = padding at start&end; 2 = padding at end; 3 = no padding, 4 = copy first/lastframe, 5 = copy last frame)
+#Note: Mode 3 will give you an error due to different lengths, working on that
+PADDING = 2
+CONSTANT_VALUE = 0 #only required for mode 1 and 2; enter tf.constant(float('nan')) for NaN
+#define if z coordinate will be dropped
+DROP_Z = True
+#mirror, flips x coordinate for data augmentation
+MIRROR = True
+#define if csv file should be filtered
+CSV_FILTER  = False
+#define how many participants for test set
+TEST_COUNT = 5 #5 participants account for ca 23% of dataset
+#generate test or train dataset (True = Train dataset; False = Test dataset)
+#TRAIN = True #only works if CSV_FILTER is activated
+TRAIN = True
+#filter for specific signs
+SIGN_FILTER = True
+sign_list = [0,1,5,8]
+#define filenames for x and y:
+feature_data = 'X' #x data
+feature_labels = 'y' #y data
+#use for test dataset
+#feature_data = 'X_test_h6' #x data
+#feature_labels = 'y_test_h6' #y data
+RANDOM_STATE = 42
+#Defining Landmarks
+#index ranges for each landmark type
+#dont change these landmarks
+FACE = list(range(0, 468))
+LEFT_HAND = list(range(468, 489))
+POSE = list(range(489, 522))
+POSE_UPPER = list(range(489, 510))
+RIGHT_HAND = list(range(522, 543))
+LIPS = [61, 185, 40, 39, 37,  0, 267, 269, 270, 409,
+                 291,146, 91,181, 84, 17, 314, 405, 321, 375,
+                 78, 191, 80, 81, 82, 13, 312, 311, 310, 415,
+                 95, 88, 178, 87, 14,317, 402, 318, 324, 308]
+lipsUpperOuter= [61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291]
+lipsLowerOuter= [146, 91, 181, 84, 17, 314, 405, 321, 375, 291]
+lipsUpperInner= [78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308]
+lipsLowerInner= [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308]
+#defining landmarks that will be merged
+averaging_sets = []
+#generating list with all landmarks selected for preprocessing
+#change landmarks you want to use here:
+point_landmarks_right = RIGHT_HAND + lipsUpperInner + lipsLowerInner
+point_landmarks_left = LEFT_HAND + lipsUpperInner + lipsLowerInner
+#calculating sum of total landmarks used
+LANDMARKS = len(point_landmarks_right) + len(averaging_sets)
+print(f'Total count of used landmarks: {LANDMARKS}')
+#defining input shape for model
+if DROP_Z:
+    INPUT_SHAPE = (LENGTH,LANDMARKS*2)
+else:
+    INPUT_SHAPE = (LENGTH,LANDMARKS*3)
+print(INPUT_SHAPE)
+#------------------------------
+# GAME MECHANICS
+#------------------------------
+COUNTDOWN = 0
+LABEL_MAP = {'brown': 0,  'callonphone': 1,  'cow': 2,  'cry': 3,  'dad': 4,  'fireman': 5,  'frog': 6,  'gum': 7,  'icecream': 8,  'minemy': 9,  'nose': 10,  'owl': 11,  'please': 12,  'radio': 13,  'shhh': 14,  'shirt': 15,  'tomorrow': 16,  'uncle': 17,  'water': 18,  'who': 19}
+SELECTED_SIGNS = list(LABEL_MAP.keys())
+SELECTED_LABELS = [LABEL_MAP[x] for x in SELECTED_SIGNS]
+#------------------------------
+# VISUALIZATION
+#------------------------------
+TRANSITION_FRAMES = LENGTH

requirements.txt ADDED Viewed

	@@ -0,0 +1,80 @@

+altair==4.2.2
+appnope==0.1.3
+asttokens==2.2.1
+attrs==23.1.0
+backcall==0.2.0
+blinker==1.6.2
+cachetools==5.3.0
+certifi==2022.12.7
+charset-normalizer==3.1.0
+click==8.1.3
+comm==0.1.2
+customtkinter==5.1.2
+darkdetect==0.8.0
+debugpy==1.6.6
+decorator==5.1.1
+entrypoints==0.4
+etils==1.2.0
+executing==1.2.0
+gitdb==4.0.10
+GitPython==3.1.31
+idna==3.4
+importlib-metadata==6.0.0
+ipykernel==6.21.2
+ipython==8.10.0
+jax==0.4.8
+jaxlib==0.4.7
+jedi==0.18.2
+Jinja2==3.1.2
+jsonschema==4.17.3
+jupyter_client==8.0.3
+jupyter_core==5.2.0
+markdown-it-py==2.2.0
+MarkupSafe==2.1.2
+matplotlib-inline==0.1.6
+mdurl==0.1.2
+ml-dtypes==0.1.0
+nest-asyncio==1.5.6
+numpy==1.24.3
+opt-einsum==3.3.0
+packaging==23.0
+pandas==1.5.3
+parso==0.8.3
+pexpect==4.8.0
+pickleshare==0.7.5
+Pillow==9.5.0
+platformdirs==3.0.0
+prompt-toolkit==3.0.36
+protobuf==3.20.3
+psutil==5.9.4
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pyarrow==11.0.0
+pydeck==0.8.1b0
+Pygments==2.14.0
+Pympler==1.0.1
+pyrsistent==0.19.3
+python-dateutil==2.8.2
+pytz==2023.3
+pytz-deprecation-shim==0.1.0.post0
+pyzmq==25.0.0
+requests==2.28.2
+rich==13.3.4
+scipy==1.10.1
+six==1.16.0
+smmap==5.0.0
+stack-data==0.6.2
+streamlit==1.21.0
+tensorflow-hub==0.13.0
+tensorflowjs==4.4.0
+toml==0.10.2
+toolz==0.12.0
+tornado==6.2
+traitlets==5.9.0
+typing_extensions==4.5.0
+tzdata==2023.3
+tzlocal==4.3
+urllib3==1.26.15
+validators==0.20.0
+wcwidth==0.2.6
+zipp==3.13.0