Spaces:
Sleeping
Sleeping
“Jin-HoMichaelLee” commited on
Commit ·
e817788
1
Parent(s): 606339c
Add application file
Browse files- app.py +166 -0
- functions.py +221 -0
- params.py +111 -0
- requirements.txt +80 -0
app.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import mediapipe as mp
|
| 3 |
+
import cv2 as cv
|
| 4 |
+
import numpy as np
|
| 5 |
+
|
| 6 |
+
import tempfile
|
| 7 |
+
import time
|
| 8 |
+
|
| 9 |
+
# developer modules
|
| 10 |
+
from functions import draw_styled_landmarks, real_time_prediction
|
| 11 |
+
from params import LENGTH, SELECTED_SIGNS, TRANSITION_FRAMES, SELECTED_LABELS, MODEL
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
# ------------------------------
|
| 15 |
+
# Basic App Scaffolding
|
| 16 |
+
# ------------------------------
|
| 17 |
+
|
| 18 |
+
# Title
|
| 19 |
+
st.title('SignMeUp')
|
| 20 |
+
|
| 21 |
+
# Markdown styling
|
| 22 |
+
st.markdown(
|
| 23 |
+
"""
|
| 24 |
+
<style>
|
| 25 |
+
[data-testid="stSidebar"][aria-expanded="true"] > div:first-child{
|
| 26 |
+
width: 350px
|
| 27 |
+
}
|
| 28 |
+
[data-testid="stSidebar"][aria-expanded="false"] > div:first-child{
|
| 29 |
+
width: 350px
|
| 30 |
+
margin-left: -350px
|
| 31 |
+
}
|
| 32 |
+
</style>
|
| 33 |
+
""",
|
| 34 |
+
unsafe_allow_html=True,
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
# Create Sidebar
|
| 38 |
+
st.sidebar.title('SignMeUp Sidebar')
|
| 39 |
+
st.sidebar.subheader('Parameter')
|
| 40 |
+
|
| 41 |
+
# Define available pages in selection box
|
| 42 |
+
app_mode = st.sidebar.selectbox(
|
| 43 |
+
'App Mode',
|
| 44 |
+
['Video Recognition', 'About', 'Contact']
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
# ------------------------------
|
| 49 |
+
# About Page
|
| 50 |
+
# ------------------------------
|
| 51 |
+
|
| 52 |
+
if app_mode == 'About':
|
| 53 |
+
st.markdown('''
|
| 54 |
+
## About \n
|
| 55 |
+
In this application we are using **MediaPipe** landmark prediction for recognizing American Sign Language. **StreamLit** is used to create the Web Graphical User Interface (GUI) \n
|
| 56 |
+
|
| 57 |
+
- [Github](https://github.com/vosmani36/Capstone_Project_SignMeUp/tree/main/notebooks) \n
|
| 58 |
+
''')
|
| 59 |
+
|
| 60 |
+
## Add Sidebar and Window style
|
| 61 |
+
st.markdown(
|
| 62 |
+
"""
|
| 63 |
+
<style>
|
| 64 |
+
[data-testid="stSidebar"][aria-expanded="true"] > div:first-child{
|
| 65 |
+
width: 350px
|
| 66 |
+
}
|
| 67 |
+
[data-testid="stSidebar"][aria-expanded="false"] > div:first-child{
|
| 68 |
+
width: 350px
|
| 69 |
+
margin-left: -350px
|
| 70 |
+
}
|
| 71 |
+
</style>
|
| 72 |
+
""",
|
| 73 |
+
unsafe_allow_html=True,
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
# ------------------------------
|
| 78 |
+
# Video Recognition Page
|
| 79 |
+
# ------------------------------
|
| 80 |
+
|
| 81 |
+
elif app_mode == 'Video Recognition':
|
| 82 |
+
|
| 83 |
+
st.set_option('deprecation.showfileUploaderEncoding', False)
|
| 84 |
+
|
| 85 |
+
use_webcam = st.sidebar.button('Use Webcam')
|
| 86 |
+
|
| 87 |
+
## Get Video
|
| 88 |
+
stframe = st.empty()
|
| 89 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False)
|
| 90 |
+
|
| 91 |
+
if use_webcam:
|
| 92 |
+
video = cv.VideoCapture(0)
|
| 93 |
+
else:
|
| 94 |
+
video = cv.VideoCapture('https://cdn.dribbble.com/users/17914/screenshots/4902225/video-placeholder.png')
|
| 95 |
+
|
| 96 |
+
width = int(video.get(cv.CAP_PROP_FRAME_WIDTH))
|
| 97 |
+
height = int(video.get(cv.CAP_PROP_FRAME_HEIGHT))
|
| 98 |
+
fps_input = int(video.get(cv.CAP_PROP_FPS))
|
| 99 |
+
|
| 100 |
+
## Recording
|
| 101 |
+
fps = 0
|
| 102 |
+
sign_recognized = ' '
|
| 103 |
+
prob_recognized = 0
|
| 104 |
+
i = 0
|
| 105 |
+
|
| 106 |
+
kpil, kpil2, kpil3 = st.columns(3)
|
| 107 |
+
|
| 108 |
+
with kpil:
|
| 109 |
+
st.markdown('**Frame Rate**')
|
| 110 |
+
kpil_text = st.markdown('0')
|
| 111 |
+
|
| 112 |
+
with kpil2:
|
| 113 |
+
st.markdown('**Sign**')
|
| 114 |
+
kpil2_text = st.markdown('0')
|
| 115 |
+
|
| 116 |
+
with kpil3:
|
| 117 |
+
st.markdown('**Probability**')
|
| 118 |
+
kpil3_text = st.markdown('0')
|
| 119 |
+
|
| 120 |
+
st.markdown('<hr/>', unsafe_allow_html=True)
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
## Live Video Mediapipe Holistic
|
| 124 |
+
|
| 125 |
+
# New detection variables
|
| 126 |
+
sequence = [] # to collect all 22 frames for prediction
|
| 127 |
+
sentence = [] # history of all predictions (predicted words)
|
| 128 |
+
predictions = []
|
| 129 |
+
threshold = 0.5 # confidence metrics (only render prediction results, if confidence is above threshold)
|
| 130 |
+
|
| 131 |
+
# Real-time prediction
|
| 132 |
+
with mp.solutions.holistic.Holistic(
|
| 133 |
+
min_detection_confidence=0.5,
|
| 134 |
+
min_tracking_confidence=0.5
|
| 135 |
+
) as holistic:
|
| 136 |
+
|
| 137 |
+
prevTime = 0
|
| 138 |
+
|
| 139 |
+
while video.isOpened():
|
| 140 |
+
i +=1
|
| 141 |
+
ret, frame = video.read()
|
| 142 |
+
if not ret:
|
| 143 |
+
continue
|
| 144 |
+
|
| 145 |
+
# Make MediaPipe detections
|
| 146 |
+
results = holistic.process(frame)
|
| 147 |
+
|
| 148 |
+
# Draw detected landmarks
|
| 149 |
+
draw_styled_landmarks(frame, results)
|
| 150 |
+
|
| 151 |
+
# Real-time prediction
|
| 152 |
+
sign_recognized, prob_recognized = real_time_prediction(results, sequence, predictions, threshold, LENGTH, MODEL, SELECTED_LABELS, TRANSITION_FRAMES, SELECTED_SIGNS)
|
| 153 |
+
|
| 154 |
+
# FPS Counter
|
| 155 |
+
currTime = time.time()
|
| 156 |
+
fps = 1/(currTime - prevTime)
|
| 157 |
+
prevTime = currTime
|
| 158 |
+
|
| 159 |
+
# Dashboard
|
| 160 |
+
kpil_text.write(f"<h1 style='text-align: center; color:(52, 75, 102);'>{int(fps)}</h1>", unsafe_allow_html=True)
|
| 161 |
+
kpil2_text.write(f"<h1 style='text-align: center; color:(52, 75, 102);'>{sign_recognized}</h1>", unsafe_allow_html=True)
|
| 162 |
+
kpil3_text.write(f"<h1 style='text-align: center; color:(52, 75, 102);'>{prob_recognized}</h1>",
|
| 163 |
+
unsafe_allow_html=True)
|
| 164 |
+
|
| 165 |
+
frame = cv.resize(frame,(0,0), fx=0.8, fy=0.8)
|
| 166 |
+
stframe.image(frame,channels='BGR', use_column_width=True)
|
functions.py
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import cv2 # for camera feed
|
| 4 |
+
import mediapipe as mp # for accessing and reading from webcam
|
| 5 |
+
import tensorflow as tf
|
| 6 |
+
|
| 7 |
+
# developer modules
|
| 8 |
+
from params import LENGTH, DROP_Z, averaging_sets, point_landmarks_left, point_landmarks_right, FLATTEN, INPUT_SHAPE, RIGHT_HAND, LEFT_HAND, PADDING, CONSTANT_VALUE
|
| 9 |
+
|
| 10 |
+
# Initiate mediapipe model and utils
|
| 11 |
+
mp_holistic = mp.solutions.holistic # holistic model
|
| 12 |
+
mp_drawing = mp.solutions.drawing_utils # drawing utilities
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
# ------------------------------
|
| 16 |
+
# Mediapipe
|
| 17 |
+
# ------------------------------
|
| 18 |
+
|
| 19 |
+
# function to extract coordinates (+visibility) of all landmarks --> keypoints
|
| 20 |
+
# and concatenates everything into a flattened list
|
| 21 |
+
def extract_keypoints(results):
|
| 22 |
+
face = np.array([[r.x, r.y, r.z] for r in results.face_landmarks.landmark]) if results.face_landmarks else np.zeros([468, 3])
|
| 23 |
+
left_hand = np.array([[r.x, r.y, r.z] for r in results.left_hand_landmarks.landmark]) if results.left_hand_landmarks else np.zeros([21, 3])
|
| 24 |
+
pose = np.array([[r.x, r.y, r.z] for r in results.pose_landmarks.landmark]) if results.pose_landmarks else np.zeros([33, 3]) # x, y, z and extra value visibility
|
| 25 |
+
right_hand = np.array([[r.x, r.y, r.z] for r in results.right_hand_landmarks.landmark]) if results.right_hand_landmarks else np.zeros([21, 3])
|
| 26 |
+
return np.concatenate([face, left_hand, pose, right_hand]) # original code
|
| 27 |
+
# a flattened list with list of all face, left_hand, pose, right_hand landmark x, y, z, (+visibility) coordinates
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
# ------------------------------
|
| 31 |
+
# Visualization
|
| 32 |
+
# ------------------------------
|
| 33 |
+
|
| 34 |
+
# function to draw landmarks points and connecting lines on top of an image, e.g. on top of your camera feed
|
| 35 |
+
def draw_styled_landmarks(image, results):
|
| 36 |
+
# draw face connections
|
| 37 |
+
mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION,
|
| 38 |
+
mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
|
| 39 |
+
mp_drawing.DrawingSpec(color=(224,208,64), thickness=1, circle_radius=1))
|
| 40 |
+
# draw pose connections
|
| 41 |
+
mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
|
| 42 |
+
mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
|
| 43 |
+
mp_drawing.DrawingSpec(color=(224,208,64), thickness=2, circle_radius=2))
|
| 44 |
+
# draw left hand connections
|
| 45 |
+
mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
|
| 46 |
+
mp_drawing.DrawingSpec(color=(224,208,64), thickness=2, circle_radius=4),
|
| 47 |
+
mp_drawing.DrawingSpec(color=(235,206,135), thickness=2, circle_radius=2))
|
| 48 |
+
# draw right hand connections
|
| 49 |
+
mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
|
| 50 |
+
mp_drawing.DrawingSpec(color=(224,208,64), thickness=2, circle_radius=4),
|
| 51 |
+
mp_drawing.DrawingSpec(color=(128,128,240), thickness=2, circle_radius=2))
|
| 52 |
+
|
| 53 |
+
# function to visualize predicted word probabilities with a dynamic real-time bar chart
|
| 54 |
+
def prob_viz(pred, SELECTED_SIGNS, input_frame):
|
| 55 |
+
output_frame = input_frame.copy()
|
| 56 |
+
bar_zero = 15
|
| 57 |
+
|
| 58 |
+
for num, prob in enumerate(pred):
|
| 59 |
+
cv2.rectangle(output_frame,
|
| 60 |
+
pt1=(bar_zero, 65+num*50),
|
| 61 |
+
pt2=(bar_zero+int(prob*100*5), 95+num*50),
|
| 62 |
+
color=(200, 200, 200), thickness=-1)
|
| 63 |
+
# cv2.rectangle(image, start_point, end_point, color, thickness)
|
| 64 |
+
cv2.putText(img=output_frame,
|
| 65 |
+
text=SELECTED_SIGNS[num],
|
| 66 |
+
org=(bar_zero, 90+num*50),
|
| 67 |
+
fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1,
|
| 68 |
+
color=(50, 50, 50),
|
| 69 |
+
thickness=1, lineType=cv2.LINE_AA)
|
| 70 |
+
# cv2.putText(image, 'OpenCV', org, font, fontScale, color, thickness, cv2.LINE_AA)
|
| 71 |
+
return output_frame
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
# ------------------------------
|
| 75 |
+
# Pre-processing
|
| 76 |
+
# ------------------------------
|
| 77 |
+
|
| 78 |
+
# helper function for pre-processing
|
| 79 |
+
def tf_nan_mean(x, axis=0):
|
| 80 |
+
#calculates the mean of a TensorFlow tensor x along a specified axis while ignoring any NaN values in the tensor.
|
| 81 |
+
return tf.reduce_sum(tf.where(tf.math.is_nan(x), tf.zeros_like(x), x), axis=axis) / tf.reduce_sum(tf.where(tf.math.is_nan(x), tf.zeros_like(x), tf.ones_like(x)), axis=axis)
|
| 82 |
+
|
| 83 |
+
# helper function for pre-processing
|
| 84 |
+
def right_hand_percentage(x):
|
| 85 |
+
#calculates percentage of right hand usage
|
| 86 |
+
right = tf.gather(x, RIGHT_HAND, axis=1)
|
| 87 |
+
left = tf.gather(x, LEFT_HAND, axis=1)
|
| 88 |
+
right_count = tf.reduce_sum(tf.where(tf.math.is_nan(right), tf.zeros_like(right), tf.ones_like(right)))
|
| 89 |
+
left_count = tf.reduce_sum(tf.where(tf.math.is_nan(left), tf.zeros_like(left), tf.ones_like(left)))
|
| 90 |
+
return right_count / (left_count+right_count)
|
| 91 |
+
|
| 92 |
+
#generating preprocessing layer that will be added to final model
|
| 93 |
+
class FeatureGen(tf.keras.layers.Layer):
|
| 94 |
+
#defines custom tensorflow layer
|
| 95 |
+
def __init__(self):
|
| 96 |
+
#initializes layer
|
| 97 |
+
super(FeatureGen, self).__init__()
|
| 98 |
+
|
| 99 |
+
def call(self, x_in, MIRROR=False):
|
| 100 |
+
#drop z coordinates if required
|
| 101 |
+
if DROP_Z:
|
| 102 |
+
x_in = x_in[:, :, 0:2]
|
| 103 |
+
if MIRROR:
|
| 104 |
+
#flipping x coordinates
|
| 105 |
+
x_in = np.array(x_in)
|
| 106 |
+
x_in[:, :, 0] = (x_in[:, :, 0]-1)*(-1)
|
| 107 |
+
x_in = tf.convert_to_tensor(x_in)
|
| 108 |
+
|
| 109 |
+
#generates list with mean values for landmarks that will be merged
|
| 110 |
+
x_list = [tf.expand_dims(tf_nan_mean(x_in[:, av_set[0]:av_set[0]+av_set[1], :], axis=1), axis=1) for av_set in averaging_sets]
|
| 111 |
+
|
| 112 |
+
#extracts specific columns from input x_in defined by landmarks
|
| 113 |
+
handedness = right_hand_percentage(x_in)
|
| 114 |
+
if handedness > 0.5:
|
| 115 |
+
x_list.append(tf.gather(x_in, point_landmarks_right, axis=1))
|
| 116 |
+
else:
|
| 117 |
+
x_list.append(tf.gather(x_in, point_landmarks_left, axis=1))
|
| 118 |
+
|
| 119 |
+
#concatenates the two tensors from above along axis 1/columns
|
| 120 |
+
x = tf.concat(x_list, 1)
|
| 121 |
+
|
| 122 |
+
#padding to desired length of sequence (defined by LENGTH)
|
| 123 |
+
#get current number of rows
|
| 124 |
+
x_padded = x
|
| 125 |
+
current_rows = tf.shape(x_padded)[0]
|
| 126 |
+
#if current number of rows is greater than desired number of rows, truncate excess rows
|
| 127 |
+
if current_rows > LENGTH:
|
| 128 |
+
x_padded = x_padded[:LENGTH, :, :]
|
| 129 |
+
|
| 130 |
+
#if current number of rows is less than desired number of rows, add padding
|
| 131 |
+
elif current_rows < LENGTH:
|
| 132 |
+
#calculate amount of padding needed
|
| 133 |
+
pad_rows = LENGTH - current_rows
|
| 134 |
+
|
| 135 |
+
if PADDING ==4: #copy first/last frame
|
| 136 |
+
if pad_rows %2 == 0: #if pad_rows is even
|
| 137 |
+
padding_front = tf.repeat(x_padded[0:1, :], pad_rows//2, axis=0)
|
| 138 |
+
padding_back = tf.repeat(x_padded[-1:, :], pad_rows//2, axis=0)
|
| 139 |
+
else: #if pad_rows is odd
|
| 140 |
+
padding_front = tf.repeat(x_padded[0:1, :], (pad_rows//2)+1, axis=0)
|
| 141 |
+
padding_back = tf.repeat(x_padded[-1:, :], pad_rows//2, axis=0)
|
| 142 |
+
x_padded = tf.concat([padding_front, x_padded, padding_back], axis=0)
|
| 143 |
+
elif PADDING == 5: #copy last frame
|
| 144 |
+
padding_back = tf.repeat(x_padded[-1:, :], pad_rows, axis=0)
|
| 145 |
+
x_padded = tf.concat([x_padded, padding_back], axis=0)
|
| 146 |
+
else:
|
| 147 |
+
if PADDING ==1: #padding at start and end
|
| 148 |
+
if pad_rows %2 == 0: #if pad_rows is even
|
| 149 |
+
paddings = [[pad_rows//2, pad_rows//2], [0, 0], [0, 0]]
|
| 150 |
+
else: #if pad_rows is odd
|
| 151 |
+
paddings = [[pad_rows//2+1, pad_rows//2], [0, 0], [0, 0]]
|
| 152 |
+
elif PADDING ==2: #padding only at the end of sequence
|
| 153 |
+
paddings = [[0, pad_rows], [0, 0], [0, 0]]
|
| 154 |
+
elif PADDING ==3: #no padding
|
| 155 |
+
paddings = [[0, 0], [0, 0], [0, 0]]
|
| 156 |
+
x_padded = tf.pad(x_padded, paddings, mode='CONSTANT', constant_values=CONSTANT_VALUE)
|
| 157 |
+
|
| 158 |
+
x = x_padded
|
| 159 |
+
current_rows = tf.shape(x)[0]
|
| 160 |
+
|
| 161 |
+
#interpolate single missing values
|
| 162 |
+
x = pd.DataFrame(np.array(x).flatten()).interpolate(method='linear', limit=2, limit_direction='both')
|
| 163 |
+
#fill missing values with zeros
|
| 164 |
+
x = tf.where(tf.math.is_nan(x), tf.zeros_like(x), x)
|
| 165 |
+
|
| 166 |
+
#reshape data to 2D or 3D array
|
| 167 |
+
if FLATTEN:
|
| 168 |
+
x = tf.reshape(x, (1, current_rows*INPUT_SHAPE[1]))
|
| 169 |
+
else:
|
| 170 |
+
x = tf.reshape(x, (1, current_rows, INPUT_SHAPE[1]))
|
| 171 |
+
|
| 172 |
+
return x
|
| 173 |
+
|
| 174 |
+
#define converter using generated layer
|
| 175 |
+
feature_converter = FeatureGen()
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
# ------------------------------
|
| 179 |
+
# Real-time prediction
|
| 180 |
+
# ------------------------------
|
| 181 |
+
|
| 182 |
+
def real_time_prediction(results, sequence, predictions, threshold, LENGTH, MODEL, SELECTED_LABELS, TRANSITION_FRAMES, SELECTED_SIGNS):
|
| 183 |
+
sign = ''
|
| 184 |
+
prob = 0
|
| 185 |
+
|
| 186 |
+
# Extract key points into a sequence
|
| 187 |
+
keypoints = extract_keypoints(results) # extract keypoints x, y, z for face, left_hand, pose, right_hand from mediapipe holistic predictions, keypoints.shape e.g. (543, 3)
|
| 188 |
+
sequence.append(keypoints) # keep appending keypoints (frames) to a sequence, np.array(sequence).shape e.g. (22, 543, 3)
|
| 189 |
+
sequence = sequence[-LENGTH:] # takes last e.g. 22 frames of the sequence
|
| 190 |
+
|
| 191 |
+
# Predict upon full sequence
|
| 192 |
+
if len(sequence) == LENGTH:
|
| 193 |
+
# pre-processing
|
| 194 |
+
model_input = feature_converter(np.array(sequence))
|
| 195 |
+
#print(f'OMG! Frenzy Franzi is converting your mediapipe input! See how the shape is changing from {np.array(sequence).shape} to {model_input.shape}! SO AWESOME!!!')
|
| 196 |
+
|
| 197 |
+
# prediction
|
| 198 |
+
pred = MODEL.predict(model_input)[0] # MODEL.fit() expects something in shape (num_sequences, 30, 1662), e.g. (1, 30, 1662) for a single sequence
|
| 199 |
+
pred = pred[SELECTED_LABELS] # selects only a subset of signs, as defined in SELECTED_LABELS
|
| 200 |
+
predictions.append(np.argmax(pred)) # appends all predictions
|
| 201 |
+
|
| 202 |
+
# 3. Visualization logic
|
| 203 |
+
# makes sure the last x frames had the same prediction (more stable transition from one sign to another)
|
| 204 |
+
if np.unique(predictions[-TRANSITION_FRAMES:])[0]==np.argmax(pred):
|
| 205 |
+
# if the confidence of the most confident prediction is above threshold
|
| 206 |
+
if pred[np.argmax(pred)] > threshold:
|
| 207 |
+
sign = SELECTED_SIGNS[np.argmax(pred)]
|
| 208 |
+
prob = pred[np.argmax(pred)]
|
| 209 |
+
prob = np.round(float(prob), 2)
|
| 210 |
+
else:
|
| 211 |
+
sign = ' '
|
| 212 |
+
prob = 0
|
| 213 |
+
|
| 214 |
+
return sign, prob
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
# ------------------------------
|
| 219 |
+
# Streamlit
|
| 220 |
+
# ------------------------------
|
| 221 |
+
|
params.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import tensorflow as tf
|
| 2 |
+
|
| 3 |
+
# load model
|
| 4 |
+
MODEL = tf.keras.models.load_model('models/LSTM_model_20signs_7.h5')
|
| 5 |
+
|
| 6 |
+
#------------------------------
|
| 7 |
+
# PRE-PROCESSING CONFIGURATION
|
| 8 |
+
#------------------------------
|
| 9 |
+
|
| 10 |
+
#limit dataset for quick test
|
| 11 |
+
QUICK_TEST = True
|
| 12 |
+
QUICK_LIMIT = 500
|
| 13 |
+
|
| 14 |
+
#Define length of sequences for padding or cutting; 22 is the median length of all sequences
|
| 15 |
+
LENGTH = 22
|
| 16 |
+
|
| 17 |
+
#define min or max length of sequences; sequences too long/too short will be dropped
|
| 18 |
+
#max value of 92 was defined by calculating the interquartile range
|
| 19 |
+
MIN_LENGTH = 10
|
| 20 |
+
MAX_LENGTH = 92
|
| 21 |
+
|
| 22 |
+
#final data will be flattened, if false data will be 3 dimensional
|
| 23 |
+
FLATTEN = False
|
| 24 |
+
|
| 25 |
+
#define initialization of numpy array
|
| 26 |
+
ARRAY = False #(True=Zeros, False=empty values)
|
| 27 |
+
|
| 28 |
+
#Define padding mode
|
| 29 |
+
#1 = padding at start&end; 2 = padding at end; 3 = no padding, 4 = copy first/lastframe, 5 = copy last frame)
|
| 30 |
+
#Note: Mode 3 will give you an error due to different lengths, working on that
|
| 31 |
+
PADDING = 2
|
| 32 |
+
CONSTANT_VALUE = 0 #only required for mode 1 and 2; enter tf.constant(float('nan')) for NaN
|
| 33 |
+
|
| 34 |
+
#define if z coordinate will be dropped
|
| 35 |
+
DROP_Z = True
|
| 36 |
+
|
| 37 |
+
#mirror, flips x coordinate for data augmentation
|
| 38 |
+
MIRROR = True
|
| 39 |
+
|
| 40 |
+
#define if csv file should be filtered
|
| 41 |
+
CSV_FILTER = False
|
| 42 |
+
#define how many participants for test set
|
| 43 |
+
TEST_COUNT = 5 #5 participants account for ca 23% of dataset
|
| 44 |
+
#generate test or train dataset (True = Train dataset; False = Test dataset)
|
| 45 |
+
#TRAIN = True #only works if CSV_FILTER is activated
|
| 46 |
+
TRAIN = True
|
| 47 |
+
|
| 48 |
+
#filter for specific signs
|
| 49 |
+
SIGN_FILTER = True
|
| 50 |
+
sign_list = [0,1,5,8]
|
| 51 |
+
|
| 52 |
+
#define filenames for x and y:
|
| 53 |
+
feature_data = 'X' #x data
|
| 54 |
+
feature_labels = 'y' #y data
|
| 55 |
+
|
| 56 |
+
#use for test dataset
|
| 57 |
+
#feature_data = 'X_test_h6' #x data
|
| 58 |
+
#feature_labels = 'y_test_h6' #y data
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
RANDOM_STATE = 42
|
| 62 |
+
|
| 63 |
+
#Defining Landmarks
|
| 64 |
+
#index ranges for each landmark type
|
| 65 |
+
#dont change these landmarks
|
| 66 |
+
FACE = list(range(0, 468))
|
| 67 |
+
LEFT_HAND = list(range(468, 489))
|
| 68 |
+
POSE = list(range(489, 522))
|
| 69 |
+
POSE_UPPER = list(range(489, 510))
|
| 70 |
+
RIGHT_HAND = list(range(522, 543))
|
| 71 |
+
LIPS = [61, 185, 40, 39, 37, 0, 267, 269, 270, 409,
|
| 72 |
+
291,146, 91,181, 84, 17, 314, 405, 321, 375,
|
| 73 |
+
78, 191, 80, 81, 82, 13, 312, 311, 310, 415,
|
| 74 |
+
95, 88, 178, 87, 14,317, 402, 318, 324, 308]
|
| 75 |
+
lipsUpperOuter= [61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291]
|
| 76 |
+
lipsLowerOuter= [146, 91, 181, 84, 17, 314, 405, 321, 375, 291]
|
| 77 |
+
lipsUpperInner= [78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308]
|
| 78 |
+
lipsLowerInner= [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308]
|
| 79 |
+
#defining landmarks that will be merged
|
| 80 |
+
averaging_sets = []
|
| 81 |
+
|
| 82 |
+
#generating list with all landmarks selected for preprocessing
|
| 83 |
+
#change landmarks you want to use here:
|
| 84 |
+
point_landmarks_right = RIGHT_HAND + lipsUpperInner + lipsLowerInner
|
| 85 |
+
point_landmarks_left = LEFT_HAND + lipsUpperInner + lipsLowerInner
|
| 86 |
+
|
| 87 |
+
#calculating sum of total landmarks used
|
| 88 |
+
LANDMARKS = len(point_landmarks_right) + len(averaging_sets)
|
| 89 |
+
print(f'Total count of used landmarks: {LANDMARKS}')
|
| 90 |
+
|
| 91 |
+
#defining input shape for model
|
| 92 |
+
if DROP_Z:
|
| 93 |
+
INPUT_SHAPE = (LENGTH,LANDMARKS*2)
|
| 94 |
+
else:
|
| 95 |
+
INPUT_SHAPE = (LENGTH,LANDMARKS*3)
|
| 96 |
+
print(INPUT_SHAPE)
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
#------------------------------
|
| 100 |
+
# GAME MECHANICS
|
| 101 |
+
#------------------------------
|
| 102 |
+
|
| 103 |
+
COUNTDOWN = 0
|
| 104 |
+
LABEL_MAP = {'brown': 0, 'callonphone': 1, 'cow': 2, 'cry': 3, 'dad': 4, 'fireman': 5, 'frog': 6, 'gum': 7, 'icecream': 8, 'minemy': 9, 'nose': 10, 'owl': 11, 'please': 12, 'radio': 13, 'shhh': 14, 'shirt': 15, 'tomorrow': 16, 'uncle': 17, 'water': 18, 'who': 19}
|
| 105 |
+
SELECTED_SIGNS = list(LABEL_MAP.keys())
|
| 106 |
+
SELECTED_LABELS = [LABEL_MAP[x] for x in SELECTED_SIGNS]
|
| 107 |
+
|
| 108 |
+
#------------------------------
|
| 109 |
+
# VISUALIZATION
|
| 110 |
+
#------------------------------
|
| 111 |
+
TRANSITION_FRAMES = LENGTH
|
requirements.txt
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
altair==4.2.2
|
| 2 |
+
appnope==0.1.3
|
| 3 |
+
asttokens==2.2.1
|
| 4 |
+
attrs==23.1.0
|
| 5 |
+
backcall==0.2.0
|
| 6 |
+
blinker==1.6.2
|
| 7 |
+
cachetools==5.3.0
|
| 8 |
+
certifi==2022.12.7
|
| 9 |
+
charset-normalizer==3.1.0
|
| 10 |
+
click==8.1.3
|
| 11 |
+
comm==0.1.2
|
| 12 |
+
customtkinter==5.1.2
|
| 13 |
+
darkdetect==0.8.0
|
| 14 |
+
debugpy==1.6.6
|
| 15 |
+
decorator==5.1.1
|
| 16 |
+
entrypoints==0.4
|
| 17 |
+
etils==1.2.0
|
| 18 |
+
executing==1.2.0
|
| 19 |
+
gitdb==4.0.10
|
| 20 |
+
GitPython==3.1.31
|
| 21 |
+
idna==3.4
|
| 22 |
+
importlib-metadata==6.0.0
|
| 23 |
+
ipykernel==6.21.2
|
| 24 |
+
ipython==8.10.0
|
| 25 |
+
jax==0.4.8
|
| 26 |
+
jaxlib==0.4.7
|
| 27 |
+
jedi==0.18.2
|
| 28 |
+
Jinja2==3.1.2
|
| 29 |
+
jsonschema==4.17.3
|
| 30 |
+
jupyter_client==8.0.3
|
| 31 |
+
jupyter_core==5.2.0
|
| 32 |
+
markdown-it-py==2.2.0
|
| 33 |
+
MarkupSafe==2.1.2
|
| 34 |
+
matplotlib-inline==0.1.6
|
| 35 |
+
mdurl==0.1.2
|
| 36 |
+
ml-dtypes==0.1.0
|
| 37 |
+
nest-asyncio==1.5.6
|
| 38 |
+
numpy==1.24.3
|
| 39 |
+
opt-einsum==3.3.0
|
| 40 |
+
packaging==23.0
|
| 41 |
+
pandas==1.5.3
|
| 42 |
+
parso==0.8.3
|
| 43 |
+
pexpect==4.8.0
|
| 44 |
+
pickleshare==0.7.5
|
| 45 |
+
Pillow==9.5.0
|
| 46 |
+
platformdirs==3.0.0
|
| 47 |
+
prompt-toolkit==3.0.36
|
| 48 |
+
protobuf==3.20.3
|
| 49 |
+
psutil==5.9.4
|
| 50 |
+
ptyprocess==0.7.0
|
| 51 |
+
pure-eval==0.2.2
|
| 52 |
+
pyarrow==11.0.0
|
| 53 |
+
pydeck==0.8.1b0
|
| 54 |
+
Pygments==2.14.0
|
| 55 |
+
Pympler==1.0.1
|
| 56 |
+
pyrsistent==0.19.3
|
| 57 |
+
python-dateutil==2.8.2
|
| 58 |
+
pytz==2023.3
|
| 59 |
+
pytz-deprecation-shim==0.1.0.post0
|
| 60 |
+
pyzmq==25.0.0
|
| 61 |
+
requests==2.28.2
|
| 62 |
+
rich==13.3.4
|
| 63 |
+
scipy==1.10.1
|
| 64 |
+
six==1.16.0
|
| 65 |
+
smmap==5.0.0
|
| 66 |
+
stack-data==0.6.2
|
| 67 |
+
streamlit==1.21.0
|
| 68 |
+
tensorflow-hub==0.13.0
|
| 69 |
+
tensorflowjs==4.4.0
|
| 70 |
+
toml==0.10.2
|
| 71 |
+
toolz==0.12.0
|
| 72 |
+
tornado==6.2
|
| 73 |
+
traitlets==5.9.0
|
| 74 |
+
typing_extensions==4.5.0
|
| 75 |
+
tzdata==2023.3
|
| 76 |
+
tzlocal==4.3
|
| 77 |
+
urllib3==1.26.15
|
| 78 |
+
validators==0.20.0
|
| 79 |
+
wcwidth==0.2.6
|
| 80 |
+
zipp==3.13.0
|