Spaces:

randomshit11
/

frrf

Sleeping

App Files Files Community

randomshit11 commited on Feb 28, 2024

Commit

8fbb28c

verified ·

1 Parent(s): f5e0301

Update app.py

Browse files

Files changed (1) hide show

app.py +120 -164

app.py CHANGED Viewed

@@ -35,18 +35,7 @@ def attention_block(inputs, time_steps):
 @st.cache(allow_output_mutation=True)
 def build_model(HIDDEN_UNITS=256, sequence_length=30, num_input_values=33*4, num_classes=3):
-    """
-    Function used to build the deep neural network model on startup
-    Args:
-        HIDDEN_UNITS (int, optional): Number of hidden units for each neural network hidden layer. Defaults to 256.
-        sequence_length (int, optional): Input sequence length (i.e., number of frames). Defaults to 30.
-        num_input_values (_type_, optional): Input size of the neural network model. Defaults to 33*4 (i.e., number of keypoints x number of metrics).
-        num_classes (int, optional): Number of classification categories (i.e., model output size). Defaults to 3.
-    Returns:
-        keras model: neural network with pre-trained weights
-    """
     # Input
     inputs = Input(shape=(sequence_length, num_input_values))
     # Bi-LSTM
@@ -70,24 +59,10 @@ def build_model(HIDDEN_UNITS=256, sequence_length=30, num_input_values=33*4, num
 HIDDEN_UNITS = 256
 model = build_model(HIDDEN_UNITS)
-## App
-st.write("# AI Personal Fitness Trainer Web App")
-st.markdown("❗❗ **Development Note** ❗❗")
-st.markdown("Currently, the exercise recognition model uses the the x, y, and z coordinates of each anatomical landmark from the MediaPipe Pose model. These coordinates are normalized with respect to the image frame (e.g., the top left corner represents (x=0,y=0) and the bottom right corner represents(x=1,y=1)).")
-st.markdown("I'm currently developing and testing two new feature engineering strategies:")
-st.markdown("- Normalizing coordinates by the detected bounding box of the user")
-st.markdown("- Using joint angles rather than keypoint coordaintes as features")
-st.write("Stay Tuned!")
-st.write("## Settings")
 threshold1 = st.slider("Minimum Keypoint Detection Confidence", 0.00, 1.00, 0.50)
 threshold2 = st.slider("Minimum Tracking Confidence", 0.00, 1.00, 0.50)
 threshold3 = st.slider("Minimum Activity Classification Confidence", 0.00, 1.00, 0.50)
-st.write("## Activate the AI 🤖🏋️‍♂️")
 ## Mediapipe
 mp_pose = mp.solutions.pose # Pre-trained pose estimation model from Google Mediapipe
 mp_drawing = mp.solutions.drawing_utils # Supported Mediapipe visualization tools
@@ -182,8 +157,62 @@ class VideoProcessor:
                     cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2, cv2.LINE_AA
                             )
         return
     @st.cache()
     def count_reps(self, image, landmarks, mp_pose):
         """
         Counts repetitions of each exercise. Global count and stage (i.e., state) variables are updated within this function.
@@ -288,155 +317,82 @@ class VideoProcessor:
             cv2.putText(output_frame, self.actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
         return output_frame
-    # @st.cache()
-    # def process(self, image):
-    #     """
-    #     Function to process the video frame from the user's webcam and run the fitness trainer AI
-    #     Args:
-    #         image (numpy array): input image from the webcam
-    #     Returns:
-    #         numpy array: processed image with keypoint detection and fitness activity classification visualized
-    #     """
-    #     # Pose detection model
-    #     image.flags.writeable = False
-    #     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-    #     results = pose.process(image)
-    #     # Draw the hand annotations on the image.
-    #     image.flags.writeable = True
-    #     image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
-    #     self.draw_landmarks(image, results)
-    #     # Prediction logic
-    #     keypoints = self.extract_keypoints(results)
-    #     self.sequence.append(keypoints.astype('float32',casting='same_kind'))
-    #     self.sequence = self.sequence[-self.sequence_length:]
-    #     if len(self.sequence) == self.sequence_length:
-    #         res = model.predict(np.expand_dims(self.sequence, axis=0), verbose=0)[0]
-    #         # interpreter.set_tensor(self.input_details[0]['index'], np.expand_dims(self.sequence, axis=0))
-    #         # interpreter.invoke()
-    #         # res = interpreter.get_tensor(self.output_details[0]['index'])
-    #         self.current_action = self.actions[np.argmax(res)]
-    #         confidence = np.max(res)
-    #         # Erase current action variable if no probability is above threshold
-    #         if confidence < self.threshold:
-    #             self.current_action = ''
-    #         # Viz probabilities
-    #         image = self.prob_viz(res, image)
-    #         # Count reps
-    #         try:
-    #             landmarks = results.pose_landmarks.landmark
-    #             self.count_reps(
-    #                 image, landmarks, mp_pose)
-    #         except:
-    #             pass
-    #         # Display graphical information
-    #         cv2.rectangle(image, (0,0), (640, 40), self.colors[np.argmax(res)], -1)
-    #         cv2.putText(image, 'curl ' + str(self.curl_counter), (3,30),
-    #                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
-    #         cv2.putText(image, 'press ' + str(self.press_counter), (240,30),
-    #                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
-    #         cv2.putText(image, 'squat ' + str(self.squat_counter), (490,30),
-    #                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
-    #     # return cv2.flip(image, 1)
-    #     return image
-    # def recv(self, frame):
-    #     """
-    #     Receive and process video stream from webcam
-    #     Args:
-    #         frame: current video frame
-    #     Returns:
-    #         av.VideoFrame: processed video frame
-    #     """
-    #     img = frame.to_ndarray(format="bgr24")
-    #     img = self.process(img)
-    #     return av.VideoFrame.from_ndarray(img, format="bgr24")
-    def process_uploaded_file(self, file):
-        """
-        Function to process an uploaded image or video file and run the fitness trainer AI
-        Args:
-            file (BytesIO): uploaded image or video file
-        Returns:
-            numpy array: processed image with keypoint detection and fitness activity classification visualized
-        """
-        # Initialize an empty list to store processed frames
-        processed_frames = []
-        # Check if the uploaded file is a video
-        is_video = hasattr(file, 'name') and file.name.endswith(('.mp4', '.avi', '.mov'))
-        if is_video:
-            container = av.open(file)
-            for frame in container.decode(video=0):
-                # Convert the frame to OpenCV format
-                image = frame.to_image().convert("RGB")
-                image = np.array(image)
-                # Process the frame
-                processed_frame = self.process(image)
-                # Append the processed frame to the list
-                processed_frames.append(processed_frame)
-            # Close the video file container
-            container.close()
-        else:
-            # If the uploaded file is an image
-            # Load the image from the BytesIO object
-            image = Image.open(file)
-            image = np.array(image)
-            # Process the image
-            processed_frame = self.process(image)
-            # Append the processed frame to the list
-            processed_frames.append(processed_frame)
-        return processed_frames
-    def recv_uploaded_file(self, file):
-        """
-        Receive and process an uploaded video file
-        Args:
-            file (BytesIO): uploaded video file
-        Returns:
-            List[av.VideoFrame]: list of processed video frames
-        """
-        # Process the uploaded file
-        processed_frames = self.process_uploaded_file(file)
-        # Convert processed frames to av.VideoFrame objects
-        av_frames = []
-        for frame in processed_frames:
-            av_frame = av.VideoFrame.from_ndarray(frame, format="bgr24")
-            av_frames.append(av_frame)
-        return av_frames
-# Options
-RTC_CONFIGURATION = RTCConfiguration(
-    {"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}
-)
-# Streamer
-webrtc_ctx = webrtc_streamer(
-    key="AI trainer",
-    mode=WebRtcMode.SENDRECV,
-    rtc_configuration=RTC_CONFIGURATION,
-    media_stream_constraints={"video": True, "audio": False},
-    video_processor_factory=VideoProcessor,
-    async_processing=True,
-)

 @st.cache(allow_output_mutation=True)
 def build_model(HIDDEN_UNITS=256, sequence_length=30, num_input_values=33*4, num_classes=3):
     # Input
     inputs = Input(shape=(sequence_length, num_input_values))
     # Bi-LSTM
 HIDDEN_UNITS = 256
 model = build_model(HIDDEN_UNITS)
 threshold1 = st.slider("Minimum Keypoint Detection Confidence", 0.00, 1.00, 0.50)
 threshold2 = st.slider("Minimum Tracking Confidence", 0.00, 1.00, 0.50)
 threshold3 = st.slider("Minimum Activity Classification Confidence", 0.00, 1.00, 0.50)
 ## Mediapipe
 mp_pose = mp.solutions.pose # Pre-trained pose estimation model from Google Mediapipe
 mp_drawing = mp.solutions.drawing_utils # Supported Mediapipe visualization tools
                     cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2, cv2.LINE_AA
                             )
         return
     @st.cache()
+    def process_video(self, video_file):
+        """
+        Processes each frame of the input video, performs pose estimation,
+        and counts repetitions of each exercise.
+        Args:
+            video_file (BytesIO): Input video file.
+        Returns:
+            tuple: A tuple containing the processed video frames with annotations
+                   and the final count of repetitions for each exercise.
+        """
+        cap = cv2.VideoCapture(video_file)
+        out_frames = []
+        # Initialize repetition counters
+        self.curl_counter = 0
+        self.press_counter = 0
+        self.squat_counter = 0
+        while cap.isOpened():
+            ret, frame = cap.read()
+            if not ret:
+                break
+            # Convert frame to RGB (Mediapipe requires RGB input)
+            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            # Pose estimation
+            results = pose.process(frame_rgb)
+            # Draw landmarks
+            self.draw_landmarks(frame, results)
+            # Extract keypoints
+            keypoints = self.extract_keypoints(results)
+            # Count repetitions
+            self.count_reps(frame, results.pose_landmarks, mp_pose)
+            # Visualize probabilities
+            if len(self.sequence) == self.sequence_length:
+                sequence = np.array([self.sequence])
+                res = model.predict(sequence)
+                frame = self.prob_viz(res[0], frame)
+            # Append frame to output frames
+            out_frames.append(frame)
+        # Release video capture
+        cap.release()
+        # Return annotated frames and repetition counts
+        return out_frames, {'curl': self.curl_counter, 'press': self.press_counter, 'squat': self.squat_counter}
+    @st.cache()
     def count_reps(self, image, landmarks, mp_pose):
         """
         Counts repetitions of each exercise. Global count and stage (i.e., state) variables are updated within this function.
             cv2.putText(output_frame, self.actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
         return output_frame
+video_processor.process_video_input(threshold1, threshold2, threshold3)
+#     def process_uploaded_file(self, file):
+#         """
+#         Function to process an uploaded image or video file and run the fitness trainer AI
+#         Args:
+#             file (BytesIO): uploaded image or video file
+#         Returns:
+#             numpy array: processed image with keypoint detection and fitness activity classification visualized
+#         """
+#         # Initialize an empty list to store processed frames
+#         processed_frames = []
+#         # Check if the uploaded file is a video
+#         is_video = hasattr(file, 'name') and file.name.endswith(('.mp4', '.avi', '.mov'))
+#         if is_video:
+#             container = av.open(file)
+#             for frame in container.decode(video=0):
+#                 # Convert the frame to OpenCV format
+#                 image = frame.to_image().convert("RGB")
+#                 image = np.array(image)
+#                 # Process the frame
+#                 processed_frame = self.process(image)
+#                 # Append the processed frame to the list
+#                 processed_frames.append(processed_frame)
+#             # Close the video file container
+#             container.close()
+#         else:
+#             # If the uploaded file is an image
+#             # Load the image from the BytesIO object
+#             image = Image.open(file)
+#             image = np.array(image)
+#             # Process the image
+#             processed_frame = self.process(image)
+#             # Append the processed frame to the list
+#             processed_frames.append(processed_frame)
+#         return processed_frames
+#     def recv_uploaded_file(self, file):
+#         """
+#         Receive and process an uploaded video file
+#         Args:
+#             file (BytesIO): uploaded video file
+#         Returns:
+#             List[av.VideoFrame]: list of processed video frames
+#         """
+#         # Process the uploaded file
+#         processed_frames = self.process_uploaded_file(file)
+#         # Convert processed frames to av.VideoFrame objects
+#         av_frames = []
+#         for frame in processed_frames:
+#             av_frame = av.VideoFrame.from_ndarray(frame, format="bgr24")
+#             av_frames.append(av_frame)
+#         return av_frames
+# # Options
+# RTC_CONFIGURATION = RTCConfiguration(
+#     {"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}
+# )
+# # Streamer
+# webrtc_ctx = webrtc_streamer(
+#     key="AI trainer",
+#     mode=WebRtcMode.SENDRECV,
+#     rtc_configuration=RTC_CONFIGURATION,
+#     media_stream_constraints={"video": True, "audio": False},
+#     video_processor_factory=VideoProcessor,
+#     async_processing=True,
+# )