Spaces:

thienphuc12339
/

SignLanguage-pro

Runtime error

App Files Files Community

thienphuc12339 commited on Dec 20, 2024

Commit

0bfec51

verified ·

1 Parent(s): 74050d9

Update inference.py

Browse files

Files changed (1) hide show

inference.py +48 -75

inference.py CHANGED Viewed

@@ -1,4 +1,5 @@
-#inference.py
 import shutil
 import logging
 from time import time
@@ -8,8 +9,7 @@ import pandas as pd
 import cv2
 from traceback import format_exc
 from argparse import Namespace
-from transformers import Pipeline
-from simple_parsing import ArgumentParser
 import mediapipe as mp
 from mediapipe.python.solutions.pose import PoseLandmark
 from mediapipe.python.solutions.hands import HandLandmark
@@ -19,8 +19,16 @@ from visualization import draw_text_on_image
 from configs import ModelConfig, InferenceConfig
 from utils import config_logger, POSE_BASED_MODELS
 from data import Arm, get_sample_timestamp, ok_to_get_frame
-from tools import load_pipeline, Predictions
 SPOTER_POSE_LANDMARKS = [
     PoseLandmark.NOSE,
@@ -31,7 +39,8 @@ SPOTER_POSE_LANDMARKS = [
     PoseLandmark.RIGHT_ELBOW,
     PoseLandmark.LEFT_ELBOW,
     PoseLandmark.RIGHT_WRIST,
-    PoseLandmark.LEFT_WRIST ]
 SPOTER_HAND_LANDMARKS = [
     HandLandmark.WRIST,
@@ -42,6 +51,7 @@ SPOTER_HAND_LANDMARKS = [
     HandLandmark.THUMB_TIP, HandLandmark.THUMB_IP, HandLandmark.THUMB_MCP, HandLandmark.THUMB_CMC,
 ]
 def get_args() -> Namespace:
     parser = ArgumentParser(
         description="Train a model on VSL",
@@ -52,13 +62,13 @@ def get_args() -> Namespace:
     return parser.parse_args()
-def inference(model_config, inference_config: InferenceConfig, pipeline: Pipeline) -> None:
     # Load video
-    source = str(inference_config.source) if inference_config.source.is_file() else 0
     cap = cv2.VideoCapture(source)
     if inference_config.output_dir is not None:
         writer = cv2.VideoWriter(
-            str(inference_config.output_dir / "output.mp4"),
             cv2.VideoWriter_fourcc(*"mp4v"),
             cap.get(cv2.CAP_PROP_FPS),
             (int(cap.get(3)), int(cap.get(4))),
@@ -69,7 +79,6 @@ def inference(model_config, inference_config: InferenceConfig, pipeline: Pipelin
     mp_drawing = mp.solutions.drawing_utils
     mp_drawing_styles = mp.solutions.drawing_styles
     custom_pose_style = mp_drawing_styles.get_default_pose_landmarks_style()
     custom_right_hand_style = mp_drawing_styles.get_default_hand_landmarks_style()
     custom_left_hand_style = mp_drawing_styles.get_default_hand_landmarks_style()
@@ -77,30 +86,27 @@ def inference(model_config, inference_config: InferenceConfig, pipeline: Pipelin
     custom_hand_connections = list(mp_holistic.HAND_CONNECTIONS)
     if inference_config.show_skeleton:
-        # if model_config.arch == 'spoter':
         pose_landmarks = SPOTER_POSE_LANDMARKS
         hand_landmarks = SPOTER_HAND_LANDMARKS
         for landmark in PoseLandmark:
             if landmark in pose_landmarks:
-                custom_pose_style[landmark] = DrawingSpec(color=(0,255,0), thickness=2, circle_radius=2)
             else:
-                custom_pose_style[landmark] = DrawingSpec(color=(0,0,0), thickness=0, circle_radius=0)
-                for connection_tuple in custom_pose_connections:
-                    if landmark.value in connection_tuple:
-                        custom_pose_connections.remove(connection_tuple)
         for landmark in HandLandmark:
             if landmark in hand_landmarks:
-                custom_right_hand_style[landmark] = DrawingSpec(color=(0,0,255), thickness=2, circle_radius=2)
-                custom_left_hand_style[landmark] = DrawingSpec(color=(255,0,0), thickness=2, circle_radius=2)
             else:
-                custom_right_hand_style[HandLandmark[landmark.name]] = DrawingSpec(color=(0,0,0), thickness=0, circle_radius=0)
-                custom_left_hand_style[HandLandmark[landmark.name]] = DrawingSpec(color=(0,0,0), thickness=0, circle_radius=0)
-                for connection_tuple in custom_hand_connections:
-                    if landmark.value in connection_tuple:
-                        custom_hand_connections.remove(connection_tuple)
     # Init variables
     right_arm = Arm("right", inference_config.visibility)
@@ -155,19 +161,15 @@ def inference(model_config, inference_config: InferenceConfig, pipeline: Pipelin
             if left_arm_ok_to_get_frame or right_arm_ok_to_get_frame:
                 # logging.info("Frame added to the list")
                 predictions = Predictions()
-                data.append(detection_results if inference_config.use_pose_model else frame)
             # Calculate the start and end time of sign
             start_time, end_time = get_sample_timestamp(left_arm, right_arm)
-            # Convert from miliseconds to seconds
             start_time /= 1_000
             end_time /= 1_000
-            # logging.info(f"start_time: {start_time} - end_time: {end_time}")
-            # logging.info(f"\tLeft arm: {left_arm.start_time} - {left_arm.end_time} - {left_arm.is_up}")
-            # logging.info(f"\tRight arm: {right_arm.start_time} - {right_arm.end_time} - {right_arm.is_up}")
             if start_time != 0 and end_time != 0:
                 # Render waiting screen
                 if inference_config.visualize:
@@ -183,7 +185,9 @@ def inference(model_config, inference_config: InferenceConfig, pipeline: Pipelin
                         break
                 start_inference_time = time()
-                predictions = Predictions(predictions=pipeline(np.array(data)))
                 predictions.inference_time = time() - start_inference_time
                 predictions.start_time = start_time
@@ -206,20 +210,23 @@ def inference(model_config, inference_config: InferenceConfig, pipeline: Pipelin
                 mp_drawing.draw_landmarks(
                     frame,
                     detection_results.pose_landmarks,
-                    connections = custom_pose_connections, #  passing the modified connections list
-                    landmark_drawing_spec=custom_pose_style) # and drawing style
                 mp_drawing.draw_landmarks(
                     frame,
                     detection_results.right_hand_landmarks,
-                    connections = custom_hand_connections, #  passing the modified connections list
-                    landmark_drawing_spec=custom_right_hand_style) # and drawing style
                 mp_drawing.draw_landmarks(
                     frame,
                     detection_results.left_hand_landmarks,
-                    connections = custom_hand_connections, #  passing the modified connections list
-                    landmark_drawing_spec=custom_left_hand_style) # and drawing style
             if inference_config.output_dir is not None:
                 writer.write(frame)
@@ -234,42 +241,8 @@ def inference(model_config, inference_config: InferenceConfig, pipeline: Pipelin
     if inference_config.output_dir is not None:
         writer.release()
-        logging.info(f"Video is recorded and saved to {inference_config.output_dir / 'output.avi'}")
-        pd.DataFrame(results).to_csv(inference_config.output_dir / "results.csv", index=False)
-        logging.info(f"Results saved to {inference_config.output_dir / 'results.csv'}")
-# inference.py
-def main(args: Namespace) -> None:
-    model_config = args.model
-    logging.info(model_config)
-    inference_config = args.inference
-    logging.info(inference_config)
-    if model_config.arch in POSE_BASED_MODELS:
-        inference_config.use_pose_model = True
-    else:
-        inference_config.use_pose_model = False
-    pipeline_or_session = load_pipeline(model_config, inference_config)
-    logging.info("Pipeline loaded")
-    inference(model_config, inference_config, pipeline_or_session)
-    logging.info("Inference completed")
-if __name__ == "__main__":
-    try:
-        args = get_args()
-        config_logger(args.inference.output_dir / "inference.log")
-        logging.info(f"Config file loaded from {args.config_path[0]}")
-        shutil.copy(args.config_path[0], args.inference.output_dir / "inference.yaml")
-        logging.info(f"Config file saved to {args.inference.output_dir}")
-        main(args=args)
-    except Exception:
-        print(format_exc())

+# inference.py
 import shutil
 import logging
 from time import time
 import cv2
 from traceback import format_exc
 from argparse import Namespace
+from pydantic import BaseModel
 import mediapipe as mp
 from mediapipe.python.solutions.pose import PoseLandmark
 from mediapipe.python.solutions.hands import HandLandmark
 from configs import ModelConfig, InferenceConfig
 from utils import config_logger, POSE_BASED_MODELS
 from data import Arm, get_sample_timestamp, ok_to_get_frame
+from tools.models import load_pipeline, get_predictions, Predictions
+# Define id2gloss mapping
+# Bạn cần thay thế bản đồ này với bản đồ thực tế của bạn
+id2gloss = {
+    "0": "hello",
+    "1": "thanks",
+    "2": "yes",
+    # Thêm các ánh xạ cần thiết
+}
 SPOTER_POSE_LANDMARKS = [
     PoseLandmark.NOSE,
     PoseLandmark.RIGHT_ELBOW,
     PoseLandmark.LEFT_ELBOW,
     PoseLandmark.RIGHT_WRIST,
+    PoseLandmark.LEFT_WRIST
+]
 SPOTER_HAND_LANDMARKS = [
     HandLandmark.WRIST,
     HandLandmark.THUMB_TIP, HandLandmark.THUMB_IP, HandLandmark.THUMB_MCP, HandLandmark.THUMB_CMC,
 ]
 def get_args() -> Namespace:
     parser = ArgumentParser(
         description="Train a model on VSL",
     return parser.parse_args()
+def inference(model_config, inference_config: InferenceConfig, session: ort.InferenceSession) -> dict:
     # Load video
+    source = str(inference_config.source) if Path(inference_config.source).is_file() else 0
     cap = cv2.VideoCapture(source)
     if inference_config.output_dir is not None:
         writer = cv2.VideoWriter(
+            str(Path(inference_config.output_dir) / "output.mp4"),
             cv2.VideoWriter_fourcc(*"mp4v"),
             cap.get(cv2.CAP_PROP_FPS),
             (int(cap.get(3)), int(cap.get(4))),
     mp_drawing = mp.solutions.drawing_utils
     mp_drawing_styles = mp.solutions.drawing_styles
     custom_pose_style = mp_drawing_styles.get_default_pose_landmarks_style()
     custom_right_hand_style = mp_drawing_styles.get_default_hand_landmarks_style()
     custom_left_hand_style = mp_drawing_styles.get_default_hand_landmarks_style()
     custom_hand_connections = list(mp_holistic.HAND_CONNECTIONS)
     if inference_config.show_skeleton:
+        # Định dạng đặc biệt cho 'spoter'
         pose_landmarks = SPOTER_POSE_LANDMARKS
         hand_landmarks = SPOTER_HAND_LANDMARKS
         for landmark in PoseLandmark:
             if landmark in pose_landmarks:
+                custom_pose_style[landmark] = DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2)
             else:
+                custom_pose_style[landmark] = DrawingSpec(color=(0, 0, 0), thickness=0, circle_radius=0)
+                # Loại bỏ các kết nối liên quan
+                custom_pose_connections = [conn for conn in custom_pose_connections if landmark.value not in conn]
         for landmark in HandLandmark:
             if landmark in hand_landmarks:
+                custom_right_hand_style[landmark] = DrawingSpec(color=(0, 0, 255), thickness=2, circle_radius=2)
+                custom_left_hand_style[landmark] = DrawingSpec(color=(255, 0, 0), thickness=2, circle_radius=2)
             else:
+                # Loại bỏ các kết nối liên quan
+                custom_hand_connections = [conn for conn in custom_hand_connections if landmark.value not in conn]
+                custom_right_hand_style[landmark] = DrawingSpec(color=(0, 0, 0), thickness=0, circle_radius=0)
+                custom_left_hand_style[landmark] = DrawingSpec(color=(0, 0, 0), thickness=0, circle_radius=0)
     # Init variables
     right_arm = Arm("right", inference_config.visibility)
             if left_arm_ok_to_get_frame or right_arm_ok_to_get_frame:
                 # logging.info("Frame added to the list")
                 predictions = Predictions()
+                data.append(frame)  # Chỉ sử dụng frame vì bạn đang dùng .onnx
             # Calculate the start and end time of sign
             start_time, end_time = get_sample_timestamp(left_arm, right_arm)
+            # Convert from milliseconds to seconds
             start_time /= 1_000
             end_time /= 1_000
             if start_time != 0 and end_time != 0:
                 # Render waiting screen
                 if inference_config.visualize:
                         break
                 start_inference_time = time()
+                # Chuyển data thành np.ndarray phù hợp với mô hình ONNX
+                data_np = np.stack(data, axis=0)  # Giả sử mô hình nhận dạng theo batch
+                predictions = get_predictions(data_np, session, id2gloss=id2gloss, k=inference_config.top_k)
                 predictions.inference_time = time() - start_inference_time
                 predictions.start_time = start_time
                 mp_drawing.draw_landmarks(
                     frame,
                     detection_results.pose_landmarks,
+                    connections=custom_pose_connections,
+                    landmark_drawing_spec=custom_pose_style
+                )
                 mp_drawing.draw_landmarks(
                     frame,
                     detection_results.right_hand_landmarks,
+                    connections=custom_hand_connections,
+                    landmark_drawing_spec=custom_right_hand_style
+                )
                 mp_drawing.draw_landmarks(
                     frame,
                     detection_results.left_hand_landmarks,
+                    connections=custom_hand_connections,
+                    landmark_drawing_spec=custom_left_hand_style
+                )
             if inference_config.output_dir is not None:
                 writer.write(frame)
     if inference_config.output_dir is not None:
         writer.release()
+        logging.info(f"Video is recorded and saved to {Path(inference_config.output_dir) / 'output.mp4'}")
+        pd.DataFrame(results).to_csv(Path(inference_config.output_dir) / "results.csv", index=False)
+        logging.info(f"Results saved to {Path(inference_config.output_dir) / 'results.csv'}")
+    return {"results": results}