Spaces:

randomshit11
/

frrf

Sleeping

App Files Files Community

randomshit11 commited on Feb 28, 2024

Commit

fe1e463

verified ·

1 Parent(s): 94d48ae

Update app.py

Browse files

Files changed (1) hide show

app.py +246 -145

app.py CHANGED Viewed

@@ -1,189 +1,290 @@
 import streamlit as st
 import cv2
 import mediapipe as mp
-import math
-from PIL import Image
 import numpy as np
-## Build and Load Model
 def attention_block(inputs, time_steps):
-    """
-    Attention layer for deep neural network
-    """
-    # Attention weights
     a = Permute((2, 1))(inputs)
     a = Dense(time_steps, activation='softmax')(a)
-    # Attention vector
     a_probs = Permute((2, 1), name='attention_vec')(a)
-    # Luong's multiplicative score
     output_attention_mul = multiply([inputs, a_probs], name='attention_mul')
     return output_attention_mul
 @st.cache(allow_output_mutation=True)
 def build_model(HIDDEN_UNITS=256, sequence_length=30, num_input_values=33*4, num_classes=3):
-    # Input
     inputs = Input(shape=(sequence_length, num_input_values))
-    # Bi-LSTM
     lstm_out = Bidirectional(LSTM(HIDDEN_UNITS, return_sequences=True))(inputs)
-    # Attention
     attention_mul = attention_block(lstm_out, sequence_length)
     attention_mul = Flatten()(attention_mul)
-    # Fully Connected Layer
     x = Dense(2*HIDDEN_UNITS, activation='relu')(attention_mul)
     x = Dropout(0.5)(x)
-    # Output
     x = Dense(num_classes, activation='softmax')(x)
-    # Bring it all together
     model = Model(inputs=[inputs], outputs=x)
-    ## Load Model Weights
     load_dir = "./models/LSTM_Attention.h5"
     model.load_weights(load_dir)
     return model
-threshold1 = st.slider("Minimum Keypoint Detection Confidence", 0.00, 1.00, 0.50)
-threshold2 = st.slider("Minimum Tracking Confidence", 0.00, 1.00, 0.50)
-threshold3 = st.slider("Minimum Activity Classification Confidence", 0.00, 1.00, 0.50)
-## Real Time Machine Learning and Computer Vision Processes
 class VideoProcessor:
     def __init__(self):
-        # Parameters
         self.actions = np.array(['curl', 'press', 'squat'])
         self.sequence_length = 30
         self.colors = [(245,117,16), (117,245,16), (16,117,245)]
-        self.threshold = 0.50  # Default threshold for activity classification confidence
-        # Detection variables
-        self.sequence = []
-        self.current_action = ''
-        # Initialize pose model
-        self.mp_pose = mp.solutions.pose
-        self.mp_drawing = mp.solutions.drawing_utils
-        self.pose = self.mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)
-    @st.cache()
-    def draw_landmarks(self, image, results):
-        """
-        This function draws keypoints and landmarks detected by the human pose estimation model
-        """
-        self.mp_drawing.draw_landmarks(image, results.pose_landmarks, self.mp_pose.POSE_CONNECTIONS,
-                                        self.mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2),
-                                        self.mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
-                                        )
-        return image
-    @st.cache()
-    def extract_keypoints(self, results):
-        """
-        Processes and organizes the keypoints detected from the pose estimation model
-        to be used as inputs for the exercise decoder models
-        """
-        pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
-        return pose
-    @st.cache()
-    def calculate_angle(self, a, b, c):
-        """
-        Computes 3D joint angle inferred by 3 keypoints and their relative positions to one another
-        """
-        a = np.array(a) # First
-        b = np.array(b) # Mid
-        c = np.array(c) # End
-        radians = np.arctan2(c[1]-b[1], c[0]-b[0]) - np.arctan2(a[1]-b[1], a[0]-b[0])
-        angle = np.abs(radians*180.0/np.pi)
-        if angle > 180.0:
-            angle = 360-angle
-        return angle
-    @st.cache()
-    def get_coordinates(self, landmarks, side, joint):
-        """
-        Retrieves x and y coordinates of a particular keypoint from the pose estimation model
-        Args:
-            landmarks: processed keypoints from the pose estimation model
-            side: 'left' or 'right'. Denotes the side of the body of the landmark of interest.
-            joint: 'shoulder', 'elbow', 'wrist', 'hip', 'knee', or 'ankle'. Denotes which body joint is associated with the landmark of interest.
-        """
-        coord = getattr(self.mp_pose.PoseLandmark, side.upper() + "_" + joint.upper())
-        x_coord_val = landmarks[coord.value].x
-        y_coord_val = landmarks[coord.value].y
-        return [x_coord_val, y_coord_val]
-    @st.cache()
-    def viz_joint_angle(self, image, angle, joint):
-        """
-        Displays the joint angle value near the joint within the image frame
-        """
-        cv2.putText(image, str(int(angle)),
-                    tuple(np.multiply(joint, [640, 480]).astype(int)),
-                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2, cv2.LINE_AA
-                            )
-        return
-    @st.cache()
-    def process_video_input(self, threshold1, threshold2, threshold3):
-        """
-        Processes the video input and performs real-time action recognition and rep counting.
-        """
-        video_file = st.file_uploader("Upload Video", type=["mp4", "avi"])
-        if video_file is None:
-            st.warning("Please upload a video file.")
-            return
         cap = cv2.VideoCapture(video_file)
-        if not cap.isOpened():
-            st.error("Error opening video stream or file.")
-            return
         while cap.isOpened():
             ret, frame = cap.read()
             if not ret:
                 break
-            # Convert frame to RGB (Mediapipe requires RGB input)
             frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            # Pose estimation
             results = self.pose.process(frame_rgb)
-            # Draw landmarks
-            self.draw_landmarks(frame, results)
-            # Extract keypoints
-            keypoints = self.extract_keypoints(results)
-            # Visualize probabilities
-            if len(self.sequence) == self.sequence_length:
-                sequence = np.array([self.sequence])
-                res = model.predict(sequence)
-                frame = self.prob_viz(res[0], frame)
-            # Append frame to output frames
             out_frames.append(frame)
-        # Release video capture
         cap.release()
-# Create an instance of VideoProcessor
-video_processor = VideoProcessor()
-# Call the process_video_input method
-video_processor.process_video_input(threshold1, threshold2, threshold3)
 # import streamlit as st
 # import cv2

+# import streamlit as st
+# import cv2
+# import mediapipe as mp
+# import math
+# from PIL import Image
+# import numpy as np
+# ## Build and Load Model
+# def attention_block(inputs, time_steps):
+#     """
+#     Attention layer for deep neural network
+#     """
+#     # Attention weights
+#     a = Permute((2, 1))(inputs)
+#     a = Dense(time_steps, activation='softmax')(a)
+#     # Attention vector
+#     a_probs = Permute((2, 1), name='attention_vec')(a)
+#     # Luong's multiplicative score
+#     output_attention_mul = multiply([inputs, a_probs], name='attention_mul')
+#     return output_attention_mul
+# @st.cache(allow_output_mutation=True)
+# def build_model(HIDDEN_UNITS=256, sequence_length=30, num_input_values=33*4, num_classes=3):
+#     # Input
+#     inputs = Input(shape=(sequence_length, num_input_values))
+#     # Bi-LSTM
+#     lstm_out = Bidirectional(LSTM(HIDDEN_UNITS, return_sequences=True))(inputs)
+#     # Attention
+#     attention_mul = attention_block(lstm_out, sequence_length)
+#     attention_mul = Flatten()(attention_mul)
+#     # Fully Connected Layer
+#     x = Dense(2*HIDDEN_UNITS, activation='relu')(attention_mul)
+#     x = Dropout(0.5)(x)
+#     # Output
+#     x = Dense(num_classes, activation='softmax')(x)
+#     # Bring it all together
+#     model = Model(inputs=[inputs], outputs=x)
+#     ## Load Model Weights
+#     load_dir = "./models/LSTM_Attention.h5"
+#     model.load_weights(load_dir)
+#     return model
+# threshold1 = st.slider("Minimum Keypoint Detection Confidence", 0.00, 1.00, 0.50)
+# threshold2 = st.slider("Minimum Tracking Confidence", 0.00, 1.00, 0.50)
+# threshold3 = st.slider("Minimum Activity Classification Confidence", 0.00, 1.00, 0.50)
+# ## Real Time Machine Learning and Computer Vision Processes
+# class VideoProcessor:
+#     def __init__(self):
+#         # Parameters
+#         self.actions = np.array(['curl', 'press', 'squat'])
+#         self.sequence_length = 30
+#         self.colors = [(245,117,16), (117,245,16), (16,117,245)]
+#         self.threshold = 0.50  # Default threshold for activity classification confidence
+#         # Detection variables
+#         self.sequence = []
+#         self.current_action = ''
+#         # Initialize pose model
+#         self.mp_pose = mp.solutions.pose
+#         self.mp_drawing = mp.solutions.drawing_utils
+#         self.pose = self.mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)
+#     @st.cache()
+#     def draw_landmarks(self, image, results):
+#         """
+#         This function draws keypoints and landmarks detected by the human pose estimation model
+#         """
+#         self.mp_drawing.draw_landmarks(image, results.pose_landmarks, self.mp_pose.POSE_CONNECTIONS,
+#                                         self.mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2),
+#                                         self.mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
+#                                         )
+#         return image
+#     @st.cache()
+#     def extract_keypoints(self, results):
+#         """
+#         Processes and organizes the keypoints detected from the pose estimation model
+#         to be used as inputs for the exercise decoder models
+#         """
+#         pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
+#         return pose
+#     @st.cache()
+#     def calculate_angle(self, a, b, c):
+#         """
+#         Computes 3D joint angle inferred by 3 keypoints and their relative positions to one another
+#         """
+#         a = np.array(a) # First
+#         b = np.array(b) # Mid
+#         c = np.array(c) # End
+#         radians = np.arctan2(c[1]-b[1], c[0]-b[0]) - np.arctan2(a[1]-b[1], a[0]-b[0])
+#         angle = np.abs(radians*180.0/np.pi)
+#         if angle > 180.0:
+#             angle = 360-angle
+#         return angle
+#     @st.cache()
+#     def get_coordinates(self, landmarks, side, joint):
+#         """
+#         Retrieves x and y coordinates of a particular keypoint from the pose estimation model
+#         Args:
+#             landmarks: processed keypoints from the pose estimation model
+#             side: 'left' or 'right'. Denotes the side of the body of the landmark of interest.
+#             joint: 'shoulder', 'elbow', 'wrist', 'hip', 'knee', or 'ankle'. Denotes which body joint is associated with the landmark of interest.
+#         """
+#         coord = getattr(self.mp_pose.PoseLandmark, side.upper() + "_" + joint.upper())
+#         x_coord_val = landmarks[coord.value].x
+#         y_coord_val = landmarks[coord.value].y
+#         return [x_coord_val, y_coord_val]
+#     @st.cache()
+#     def viz_joint_angle(self, image, angle, joint):
+#         """
+#         Displays the joint angle value near the joint within the image frame
+#         """
+#         cv2.putText(image, str(int(angle)),
+#                     tuple(np.multiply(joint, [640, 480]).astype(int)),
+#                     cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2, cv2.LINE_AA
+#                             )
+#         return
+#     @st.cache()
+#     def process_video_input(self, threshold1, threshold2, threshold3):
+#         """
+#         Processes the video input and performs real-time action recognition and rep counting.
+#         """
+#         video_file = st.file_uploader("Upload Video", type=["mp4", "avi"])
+#         if video_file is None:
+#             st.warning("Please upload a video file.")
+#             return
+#         cap = cv2.VideoCapture(video_file)
+#         if not cap.isOpened():
+#             st.error("Error opening video stream or file.")
+#             return
+#         while cap.isOpened():
+#             ret, frame = cap.read()
+#             if not ret:
+#                 break
+#             # Convert frame to RGB (Mediapipe requires RGB input)
+#             frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+#             # Pose estimation
+#             results = self.pose.process(frame_rgb)
+#             # Draw landmarks
+#             self.draw_landmarks(frame, results)
+#             # Extract keypoints
+#             keypoints = self.extract_keypoints(results)
+#             # Visualize probabilities
+#             if len(self.sequence) == self.sequence_length:
+#                 sequence = np.array([self.sequence])
+#                 res = model.predict(sequence)
+#                 frame = self.prob_viz(res[0], frame)
+#             # Append frame to output frames
+#             out_frames.append(frame)
+#         # Release video capture
+#         cap.release()
+# # # Create an instance of VideoProcessor
+# # video_processor = VideoProcessor()
+# # # Call the process_video_input method
+# # video_processor.process_video_input(threshold1, threshold2, threshold3)
+# # Define Streamlit app
+# def main():
+#     st.title("Real-time Exercise Detection")
+#     video_file = st.file_uploader("Upload a video file", type=["mp4", "avi"])
+#     if video_file is not None:
+#         st.video(video_file)
+#         video_processor = VideoProcessor()
+#         frames = video_processor.process_video(video_file)
+#         for frame in frames:
+#             st.image(frame, channels="BGR")
+# if __name__ == "__main__":
+#     main()
 import streamlit as st
 import cv2
 import mediapipe as mp
 import numpy as np
+import math
+from tensorflow.keras.models import Model
+from tensorflow.keras.layers import (LSTM, Dense, Dropout, Input, Flatten,
+                                     Bidirectional, Permute, multiply)
+# Load the pose estimation model from Mediapipe
+mp_pose = mp.solutions.pose
+mp_drawing = mp.solutions.drawing_utils
+pose = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)
+# Define the attention block for the LSTM model
 def attention_block(inputs, time_steps):
     a = Permute((2, 1))(inputs)
     a = Dense(time_steps, activation='softmax')(a)
     a_probs = Permute((2, 1), name='attention_vec')(a)
     output_attention_mul = multiply([inputs, a_probs], name='attention_mul')
     return output_attention_mul
+# Build and load the LSTM model
 @st.cache(allow_output_mutation=True)
 def build_model(HIDDEN_UNITS=256, sequence_length=30, num_input_values=33*4, num_classes=3):
     inputs = Input(shape=(sequence_length, num_input_values))
     lstm_out = Bidirectional(LSTM(HIDDEN_UNITS, return_sequences=True))(inputs)
     attention_mul = attention_block(lstm_out, sequence_length)
     attention_mul = Flatten()(attention_mul)
     x = Dense(2*HIDDEN_UNITS, activation='relu')(attention_mul)
     x = Dropout(0.5)(x)
     x = Dense(num_classes, activation='softmax')(x)
     model = Model(inputs=[inputs], outputs=x)
     load_dir = "./models/LSTM_Attention.h5"
     model.load_weights(load_dir)
     return model
+# Define the VideoProcessor class for real-time video processing
 class VideoProcessor:
     def __init__(self):
         self.actions = np.array(['curl', 'press', 'squat'])
         self.sequence_length = 30
         self.colors = [(245,117,16), (117,245,16), (16,117,245)]
+        self.pose = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)
+        self.model = build_model()
+    def process_video(self, video_file):
         cap = cv2.VideoCapture(video_file)
+        out_frames = []
         while cap.isOpened():
             ret, frame = cap.read()
             if not ret:
                 break
             frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
             results = self.pose.process(frame_rgb)
+            frame = self.draw_landmarks(frame, results)
             out_frames.append(frame)
         cap.release()
+        return out_frames
+    def draw_landmarks(self, image, results):
+        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
+                                  mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2),
+                                  mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2))
+        return image
+# Define Streamlit app
+def main():
+    st.title("Real-time Exercise Detection")
+    video_file = st.file_uploader("Upload a video file", type=["mp4", "avi"])
+    if video_file is not None:
+        st.video(video_file)
+        video_processor = VideoProcessor()
+        frames = video_processor.process_video(video_file)
+        for frame in frames:
+            st.image(frame, channels="BGR")
+if __name__ == "__main__":
+    main()
 # import streamlit as st
 # import cv2