Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

.gitattributes +1 -0
config.yml +3 -2
hrnetv2_w48.yaml +35 -0
keypoint +3 -0
miner.py +63 -3
pitch.py +15 -25
player.py +2 -1

.gitattributes CHANGED Viewed

@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 SV_kp.engine filter=lfs diff=lfs merge=lfs -text
 osnet_model.pth.tar-100 filter=lfs diff=lfs merge=lfs -text

 *tfevents* filter=lfs diff=lfs merge=lfs -text
 SV_kp.engine filter=lfs diff=lfs merge=lfs -text
 osnet_model.pth.tar-100 filter=lfs diff=lfs merge=lfs -text
+keypoint filter=lfs diff=lfs merge=lfs -text

config.yml CHANGED Viewed

@@ -2,14 +2,15 @@ Image:
   from_base: parachutes/python:3.12
   run_command:
     - pip install --upgrade setuptools wheel
-    - pip install ultralytics==8.3.222 opencv-python-headless numpy pydantic
     - pip install scikit-learn
     - pip install onnxruntime-gpu
   set_workdir: /app
 NodeSelector:
   gpu_count: 1
-  min_vram_gb_per_gpu: 16
   exclude:
     - "5090"
     - b200

   from_base: parachutes/python:3.12
   run_command:
     - pip install --upgrade setuptools wheel
+    - pip install "torch==2.7.1" "torchvision==0.22.1"
+    - pip install "ultralytics==8.3.222" "opencv-python-headless" "numpy" "pydantic"
     - pip install scikit-learn
     - pip install onnxruntime-gpu
   set_workdir: /app
 NodeSelector:
   gpu_count: 1
+  min_vram_gb_per_gpu: 24
   exclude:
     - "5090"
     - b200

hrnetv2_w48.yaml ADDED Viewed

	@@ -0,0 +1,35 @@

+MODEL:
+  IMAGE_SIZE: [960, 540]
+  NUM_JOINTS: 58
+  PRETRAIN: ''
+  EXTRA:
+    FINAL_CONV_KERNEL: 1
+    STAGE1:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 1
+      BLOCK: BOTTLENECK
+      NUM_BLOCKS: [4]
+      NUM_CHANNELS: [64]
+      FUSE_METHOD: SUM
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS: [4, 4]
+      NUM_CHANNELS: [48, 96]
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS: [4, 4, 4]
+      NUM_CHANNELS: [48, 96, 192]
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS: [4, 4, 4, 4]
+      NUM_CHANNELS: [48, 96, 192, 384]
+      FUSE_METHOD: SUM

keypoint ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ea78fa76aaf94976a8eca428d6e3c59697a93430cba1a4603e20284b61f5113
+size 264964645

miner.py CHANGED Viewed

@@ -71,7 +71,19 @@ class Miner:
             self.team_classifier_fitted = False
             self.player_crops_for_fit = []
-            self.keypoints_model = YOLO(path_hf_repo / "keypoint.pt")
             print("Keypoints Model (keypoint.pt) Loaded")
             self.kp_threshold = 0.1
@@ -495,10 +507,58 @@ class Miner:
         print(f"Team classify time: {end - start}")
         # Phase 3: Keypoint Detection
-        keypoints: Dict[int, List[Tuple[int, int]]] = {}
-        keypoints = self._detect_keypoints_batch(batch_images, offset, n_keypoints)
         results: List[TVFrameResult] = []
         for frame_number in range(offset, offset + len(batch_images)):

             self.team_classifier_fitted = False
             self.player_crops_for_fit = []
+            # self.keypoints_model = YOLO(path_hf_repo / "keypoint.pt")
+            model_kp_path = path_hf_repo / 'keypoint'
+            config_kp_path = path_hf_repo / 'hrnetv2_w48.yaml'
+            cfg_kp = yaml.safe_load(open(config_kp_path, 'r'))
+            loaded_state_kp = torch.load(model_kp_path, map_location=device)
+            model = get_cls_net(cfg_kp)
+            model.load_state_dict(loaded_state_kp)
+            model.to(device)
+            model.eval()
+            self.keypoints_model = model
             print("Keypoints Model (keypoint.pt) Loaded")
             self.kp_threshold = 0.1
         print(f"Team classify time: {end - start}")
         # Phase 3: Keypoint Detection
+        # keypoints: Dict[int, List[Tuple[int, int]]] = {}
+        # keypoints = self._detect_keypoints_batch(batch_images, offset, n_keypoints)
+        pitch_batch_size = min(self.pitch_batch_size, len(batch_images))
+        keypoints: Dict[int, List[Tuple[int, int]]] = {}
+        start = time.time()
+        while True:
+            gc.collect()
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+                torch.cuda.synchronize()
+            device_str = "cuda"
+            keypoints_result = process_batch_input(
+                batch_images,
+                self.keypoints_model,
+                self.kp_threshold,
+                device_str,
+                batch_size=pitch_batch_size,
+            )
+            if keypoints_result is not None and len(keypoints_result) > 0:
+                for frame_number_in_batch, kp_dict in enumerate(keypoints_result):
+                    if frame_number_in_batch >= len(batch_images):
+                        break
+                    frame_keypoints: List[Tuple[int, int]] = []
+                    try:
+                        height, width = batch_images[frame_number_in_batch].shape[:2]
+                        if kp_dict is not None and isinstance(kp_dict, dict):
+                            for idx in range(32):
+                                x, y = 0, 0
+                                kp_idx = idx + 1
+                                if kp_idx in kp_dict:
+                                    try:
+                                        kp_data = kp_dict[kp_idx]
+                                        if isinstance(kp_data, dict) and "x" in kp_data and "y" in kp_data:
+                                            x = int(kp_data["x"] * width)
+                                            y = int(kp_data["y"] * height)
+                                    except (KeyError, TypeError, ValueError):
+                                        pass
+                                frame_keypoints.append((x, y))
+                    except (IndexError, ValueError, AttributeError):
+                        frame_keypoints = [(0, 0)] * 32
+                    if len(frame_keypoints) < n_keypoints:
+                        frame_keypoints.extend([(0, 0)] * (n_keypoints - len(frame_keypoints)))
+                    else:
+                        frame_keypoints = frame_keypoints[:n_keypoints]
+                    keypoints[offset + frame_number_in_batch] = frame_keypoints
+            break
+        end = time.time()
+        print(f"Keypoint time: {end - start}")
         results: List[TVFrameResult] = []
         for frame_number in range(offset, offset + len(batch_images)):

pitch.py CHANGED Viewed

@@ -520,7 +520,7 @@ def run_inference(model, input_tensor: torch.Tensor, device):
     output = model.module().forward(input_tensor)
     return output
-def preprocess_batch_fast(frames, device):
     """Ultra-fast batch preprocessing using optimized tensor operations"""
     target_size = (540, 960)  # H, W format for model input
     batch = []
@@ -530,7 +530,7 @@ def preprocess_batch_fast(frames, device):
         img = img.astype(np.float32) / 255.0
         img = np.transpose(img, (2, 0, 1))  # HWC -> CHW
         batch.append(img)
-    batch = torch.tensor(np.stack(batch), dtype=torch.float32)
     return batch
@@ -610,16 +610,24 @@ def inference_batch(frames, model, kp_threshold, device, batch_size=8):
     results = []
     num_frames = len(frames)
     # Process all frames in optimally-sized batches
     for i in range(0, num_frames, batch_size):
         current_batch_size = min(batch_size, num_frames - i)
         batch_frames = frames[i:i + current_batch_size]
-        # Fast preprocessing
-        batch = preprocess_batch_fast(batch_frames, device)
-        heatmaps = run_inference(model, batch, device)
         # Ultra-fast keypoint extraction
         kp_coords = extract_keypoints_from_heatmap_fast(heatmaps[:,:-1,:,:], scale=2, max_keypoints=1)
@@ -652,28 +660,10 @@ def get_mapped_keypoints(kp_points):
             # mapped_points[key] = value
     return mapped_points
-def process_batch_input(frames, model, kp_threshold, device, batch_size=8):
     """Process multiple input images in batch"""
     # Batch inference
     kp_results = inference_batch(frames, model, kp_threshold, device, batch_size)
     kp_results = [get_mapped_keypoints(kp) for kp in kp_results]
-    # Draw results and save
-    # for i, (frame, kp_points, input_path) in enumerate(zip(frames, kp_results, valid_paths)):
-    #     height, width = frame.shape[:2]
-    #     # Apply mapping to get standard keypoint IDs
-    #     mapped_kp_points = get_mapped_keypoints(kp_points)
-    #     for key, value in mapped_kp_points.items():
-    #         x = int(value['x'] * width)
-    #         y = int(value['y'] * height)
-    #         cv2.circle(frame, (x, y), 5, (0, 255, 0), -1)  # Green circles
-    #         cv2.putText(frame, str(key), (x+10, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
-    #     # Save result
-    #     output_path = input_path.replace('.png', '_result.png').replace('.jpg', '_result.jpg')
-    #     cv2.imwrite(output_path, frame)
-    # print(f"Batch processing complete. Processed {len(frames)} images.")
     return kp_results

     output = model.module().forward(input_tensor)
     return output
+def preprocess_batch_fast(frames):
     """Ultra-fast batch preprocessing using optimized tensor operations"""
     target_size = (540, 960)  # H, W format for model input
     batch = []
         img = img.astype(np.float32) / 255.0
         img = np.transpose(img, (2, 0, 1))  # HWC -> CHW
         batch.append(img)
+    batch = torch.from_numpy(np.stack(batch)).float()
     return batch
     results = []
     num_frames = len(frames)
+    # Get the device from the model itself
+    model_device = next(model.parameters()).device
     # Process all frames in optimally-sized batches
     for i in range(0, num_frames, batch_size):
         current_batch_size = min(batch_size, num_frames - i)
         batch_frames = frames[i:i + current_batch_size]
+        # Fast preprocessing - create on CPU first
+        batch = preprocess_batch_fast(batch_frames)
+        b, c, h, w = batch.size()
+        # Move batch to model device
+        batch = batch.to(model_device)
+        with torch.no_grad():
+            heatmaps = model(batch)
         # Ultra-fast keypoint extraction
         kp_coords = extract_keypoints_from_heatmap_fast(heatmaps[:,:-1,:,:], scale=2, max_keypoints=1)
             # mapped_points[key] = value
     return mapped_points
+def process_batch_input(frames, model, kp_threshold, device, batch_size=16):
     """Process multiple input images in batch"""
     # Batch inference
     kp_results = inference_batch(frames, model, kp_threshold, device, batch_size)
     kp_results = [get_mapped_keypoints(kp) for kp in kp_results]
     return kp_results

player.py CHANGED Viewed

@@ -240,7 +240,8 @@ def process_team_identification_batch(frames, results, kits_clf, left_team_label
                 final_label = 3  # Referee
             else:
-                final_label = int(label)  # Keep original label, ensure it's int
             frame_results.append({
                 "id": int(id),

                 final_label = 3  # Referee
             else:
+                continue
+                # final_label = int(label)  # Keep original label, ensure it's int
             frame_results.append({
                 "id": int(id),