tarto2 commited on
Commit
9e29e4c
·
verified ·
1 Parent(s): d0a126a

Upload folder using huggingface_hub

Browse files
Files changed (7) hide show
  1. .gitattributes +1 -0
  2. config.yml +3 -2
  3. hrnetv2_w48.yaml +35 -0
  4. keypoint +3 -0
  5. miner.py +63 -3
  6. pitch.py +15 -25
  7. player.py +2 -1
.gitattributes CHANGED
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  SV_kp.engine filter=lfs diff=lfs merge=lfs -text
37
  osnet_model.pth.tar-100 filter=lfs diff=lfs merge=lfs -text
 
 
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  SV_kp.engine filter=lfs diff=lfs merge=lfs -text
37
  osnet_model.pth.tar-100 filter=lfs diff=lfs merge=lfs -text
38
+ keypoint filter=lfs diff=lfs merge=lfs -text
config.yml CHANGED
@@ -2,14 +2,15 @@ Image:
2
  from_base: parachutes/python:3.12
3
  run_command:
4
  - pip install --upgrade setuptools wheel
5
- - pip install ultralytics==8.3.222 opencv-python-headless numpy pydantic
 
6
  - pip install scikit-learn
7
  - pip install onnxruntime-gpu
8
  set_workdir: /app
9
 
10
  NodeSelector:
11
  gpu_count: 1
12
- min_vram_gb_per_gpu: 16
13
  exclude:
14
  - "5090"
15
  - b200
 
2
  from_base: parachutes/python:3.12
3
  run_command:
4
  - pip install --upgrade setuptools wheel
5
+ - pip install "torch==2.7.1" "torchvision==0.22.1"
6
+ - pip install "ultralytics==8.3.222" "opencv-python-headless" "numpy" "pydantic"
7
  - pip install scikit-learn
8
  - pip install onnxruntime-gpu
9
  set_workdir: /app
10
 
11
  NodeSelector:
12
  gpu_count: 1
13
+ min_vram_gb_per_gpu: 24
14
  exclude:
15
  - "5090"
16
  - b200
hrnetv2_w48.yaml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MODEL:
2
+ IMAGE_SIZE: [960, 540]
3
+ NUM_JOINTS: 58
4
+ PRETRAIN: ''
5
+ EXTRA:
6
+ FINAL_CONV_KERNEL: 1
7
+ STAGE1:
8
+ NUM_MODULES: 1
9
+ NUM_BRANCHES: 1
10
+ BLOCK: BOTTLENECK
11
+ NUM_BLOCKS: [4]
12
+ NUM_CHANNELS: [64]
13
+ FUSE_METHOD: SUM
14
+ STAGE2:
15
+ NUM_MODULES: 1
16
+ NUM_BRANCHES: 2
17
+ BLOCK: BASIC
18
+ NUM_BLOCKS: [4, 4]
19
+ NUM_CHANNELS: [48, 96]
20
+ FUSE_METHOD: SUM
21
+ STAGE3:
22
+ NUM_MODULES: 4
23
+ NUM_BRANCHES: 3
24
+ BLOCK: BASIC
25
+ NUM_BLOCKS: [4, 4, 4]
26
+ NUM_CHANNELS: [48, 96, 192]
27
+ FUSE_METHOD: SUM
28
+ STAGE4:
29
+ NUM_MODULES: 3
30
+ NUM_BRANCHES: 4
31
+ BLOCK: BASIC
32
+ NUM_BLOCKS: [4, 4, 4, 4]
33
+ NUM_CHANNELS: [48, 96, 192, 384]
34
+ FUSE_METHOD: SUM
35
+
keypoint ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ea78fa76aaf94976a8eca428d6e3c59697a93430cba1a4603e20284b61f5113
3
+ size 264964645
miner.py CHANGED
@@ -71,7 +71,19 @@ class Miner:
71
  self.team_classifier_fitted = False
72
  self.player_crops_for_fit = []
73
 
74
- self.keypoints_model = YOLO(path_hf_repo / "keypoint.pt")
 
 
 
 
 
 
 
 
 
 
 
 
75
  print("Keypoints Model (keypoint.pt) Loaded")
76
 
77
  self.kp_threshold = 0.1
@@ -495,10 +507,58 @@ class Miner:
495
  print(f"Team classify time: {end - start}")
496
 
497
  # Phase 3: Keypoint Detection
498
- keypoints: Dict[int, List[Tuple[int, int]]] = {}
 
 
499
 
500
- keypoints = self._detect_keypoints_batch(batch_images, offset, n_keypoints)
501
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
502
 
503
  results: List[TVFrameResult] = []
504
  for frame_number in range(offset, offset + len(batch_images)):
 
71
  self.team_classifier_fitted = False
72
  self.player_crops_for_fit = []
73
 
74
+ # self.keypoints_model = YOLO(path_hf_repo / "keypoint.pt")
75
+
76
+ model_kp_path = path_hf_repo / 'keypoint'
77
+ config_kp_path = path_hf_repo / 'hrnetv2_w48.yaml'
78
+ cfg_kp = yaml.safe_load(open(config_kp_path, 'r'))
79
+
80
+ loaded_state_kp = torch.load(model_kp_path, map_location=device)
81
+ model = get_cls_net(cfg_kp)
82
+ model.load_state_dict(loaded_state_kp)
83
+ model.to(device)
84
+ model.eval()
85
+
86
+ self.keypoints_model = model
87
  print("Keypoints Model (keypoint.pt) Loaded")
88
 
89
  self.kp_threshold = 0.1
 
507
  print(f"Team classify time: {end - start}")
508
 
509
  # Phase 3: Keypoint Detection
510
+ # keypoints: Dict[int, List[Tuple[int, int]]] = {}
511
+
512
+ # keypoints = self._detect_keypoints_batch(batch_images, offset, n_keypoints)
513
 
 
514
 
515
+ pitch_batch_size = min(self.pitch_batch_size, len(batch_images))
516
+ keypoints: Dict[int, List[Tuple[int, int]]] = {}
517
+
518
+ start = time.time()
519
+ while True:
520
+ gc.collect()
521
+ if torch.cuda.is_available():
522
+ torch.cuda.empty_cache()
523
+ torch.cuda.synchronize()
524
+ device_str = "cuda"
525
+ keypoints_result = process_batch_input(
526
+ batch_images,
527
+ self.keypoints_model,
528
+ self.kp_threshold,
529
+ device_str,
530
+ batch_size=pitch_batch_size,
531
+ )
532
+ if keypoints_result is not None and len(keypoints_result) > 0:
533
+ for frame_number_in_batch, kp_dict in enumerate(keypoints_result):
534
+ if frame_number_in_batch >= len(batch_images):
535
+ break
536
+ frame_keypoints: List[Tuple[int, int]] = []
537
+ try:
538
+ height, width = batch_images[frame_number_in_batch].shape[:2]
539
+ if kp_dict is not None and isinstance(kp_dict, dict):
540
+ for idx in range(32):
541
+ x, y = 0, 0
542
+ kp_idx = idx + 1
543
+ if kp_idx in kp_dict:
544
+ try:
545
+ kp_data = kp_dict[kp_idx]
546
+ if isinstance(kp_data, dict) and "x" in kp_data and "y" in kp_data:
547
+ x = int(kp_data["x"] * width)
548
+ y = int(kp_data["y"] * height)
549
+ except (KeyError, TypeError, ValueError):
550
+ pass
551
+ frame_keypoints.append((x, y))
552
+ except (IndexError, ValueError, AttributeError):
553
+ frame_keypoints = [(0, 0)] * 32
554
+ if len(frame_keypoints) < n_keypoints:
555
+ frame_keypoints.extend([(0, 0)] * (n_keypoints - len(frame_keypoints)))
556
+ else:
557
+ frame_keypoints = frame_keypoints[:n_keypoints]
558
+ keypoints[offset + frame_number_in_batch] = frame_keypoints
559
+ break
560
+ end = time.time()
561
+ print(f"Keypoint time: {end - start}")
562
 
563
  results: List[TVFrameResult] = []
564
  for frame_number in range(offset, offset + len(batch_images)):
pitch.py CHANGED
@@ -520,7 +520,7 @@ def run_inference(model, input_tensor: torch.Tensor, device):
520
  output = model.module().forward(input_tensor)
521
  return output
522
 
523
- def preprocess_batch_fast(frames, device):
524
  """Ultra-fast batch preprocessing using optimized tensor operations"""
525
  target_size = (540, 960) # H, W format for model input
526
  batch = []
@@ -530,7 +530,7 @@ def preprocess_batch_fast(frames, device):
530
  img = img.astype(np.float32) / 255.0
531
  img = np.transpose(img, (2, 0, 1)) # HWC -> CHW
532
  batch.append(img)
533
- batch = torch.tensor(np.stack(batch), dtype=torch.float32)
534
 
535
  return batch
536
 
@@ -610,16 +610,24 @@ def inference_batch(frames, model, kp_threshold, device, batch_size=8):
610
  results = []
611
  num_frames = len(frames)
612
 
 
 
 
613
  # Process all frames in optimally-sized batches
614
  for i in range(0, num_frames, batch_size):
615
  current_batch_size = min(batch_size, num_frames - i)
616
  batch_frames = frames[i:i + current_batch_size]
617
 
618
- # Fast preprocessing
619
- batch = preprocess_batch_fast(batch_frames, device)
620
-
621
- heatmaps = run_inference(model, batch, device)
622
 
 
 
 
 
 
 
623
  # Ultra-fast keypoint extraction
624
  kp_coords = extract_keypoints_from_heatmap_fast(heatmaps[:,:-1,:,:], scale=2, max_keypoints=1)
625
 
@@ -652,28 +660,10 @@ def get_mapped_keypoints(kp_points):
652
  # mapped_points[key] = value
653
  return mapped_points
654
 
655
- def process_batch_input(frames, model, kp_threshold, device, batch_size=8):
656
  """Process multiple input images in batch"""
657
  # Batch inference
658
  kp_results = inference_batch(frames, model, kp_threshold, device, batch_size)
659
  kp_results = [get_mapped_keypoints(kp) for kp in kp_results]
660
- # Draw results and save
661
- # for i, (frame, kp_points, input_path) in enumerate(zip(frames, kp_results, valid_paths)):
662
- # height, width = frame.shape[:2]
663
-
664
- # # Apply mapping to get standard keypoint IDs
665
- # mapped_kp_points = get_mapped_keypoints(kp_points)
666
-
667
- # for key, value in mapped_kp_points.items():
668
- # x = int(value['x'] * width)
669
- # y = int(value['y'] * height)
670
- # cv2.circle(frame, (x, y), 5, (0, 255, 0), -1) # Green circles
671
- # cv2.putText(frame, str(key), (x+10, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
672
-
673
- # # Save result
674
- # output_path = input_path.replace('.png', '_result.png').replace('.jpg', '_result.jpg')
675
- # cv2.imwrite(output_path, frame)
676
-
677
- # print(f"Batch processing complete. Processed {len(frames)} images.")
678
 
679
  return kp_results
 
520
  output = model.module().forward(input_tensor)
521
  return output
522
 
523
+ def preprocess_batch_fast(frames):
524
  """Ultra-fast batch preprocessing using optimized tensor operations"""
525
  target_size = (540, 960) # H, W format for model input
526
  batch = []
 
530
  img = img.astype(np.float32) / 255.0
531
  img = np.transpose(img, (2, 0, 1)) # HWC -> CHW
532
  batch.append(img)
533
+ batch = torch.from_numpy(np.stack(batch)).float()
534
 
535
  return batch
536
 
 
610
  results = []
611
  num_frames = len(frames)
612
 
613
+ # Get the device from the model itself
614
+ model_device = next(model.parameters()).device
615
+
616
  # Process all frames in optimally-sized batches
617
  for i in range(0, num_frames, batch_size):
618
  current_batch_size = min(batch_size, num_frames - i)
619
  batch_frames = frames[i:i + current_batch_size]
620
 
621
+ # Fast preprocessing - create on CPU first
622
+ batch = preprocess_batch_fast(batch_frames)
623
+ b, c, h, w = batch.size()
 
624
 
625
+ # Move batch to model device
626
+ batch = batch.to(model_device)
627
+
628
+ with torch.no_grad():
629
+ heatmaps = model(batch)
630
+
631
  # Ultra-fast keypoint extraction
632
  kp_coords = extract_keypoints_from_heatmap_fast(heatmaps[:,:-1,:,:], scale=2, max_keypoints=1)
633
 
 
660
  # mapped_points[key] = value
661
  return mapped_points
662
 
663
+ def process_batch_input(frames, model, kp_threshold, device, batch_size=16):
664
  """Process multiple input images in batch"""
665
  # Batch inference
666
  kp_results = inference_batch(frames, model, kp_threshold, device, batch_size)
667
  kp_results = [get_mapped_keypoints(kp) for kp in kp_results]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
668
 
669
  return kp_results
player.py CHANGED
@@ -240,7 +240,8 @@ def process_team_identification_batch(frames, results, kits_clf, left_team_label
240
  final_label = 3 # Referee
241
 
242
  else:
243
- final_label = int(label) # Keep original label, ensure it's int
 
244
 
245
  frame_results.append({
246
  "id": int(id),
 
240
  final_label = 3 # Referee
241
 
242
  else:
243
+ continue
244
+ # final_label = int(label) # Keep original label, ensure it's int
245
 
246
  frame_results.append({
247
  "id": int(id),