Gege24 commited on
Commit
2e8cd2d
·
verified ·
1 Parent(s): a6e2f77

scorevision: push artifact

Browse files
Files changed (1) hide show
  1. miner.py +29 -42
miner.py CHANGED
@@ -27,35 +27,22 @@ class Miner:
27
  - Running batched predictions on images.
28
  - Parsing ML model outputs into structured results (TVFrameResult).
29
 
30
- This class can be modified, but it must have the following to be compatible with the chute:
31
- - be named `Miner`
32
- - have a `predict_batch` function with the inputs and outputs specified
33
- - be stored in a file called `miner.py` which lives in the root of the HFHub repo
34
  """
35
 
36
  def __init__(self, path_hf_repo: Path) -> None:
37
  """
38
- Loads all ML models from the repository.
39
- -----(Adjust as needed)----
40
-
41
- Args:
42
- path_hf_repo (Path):
43
- Path to the downloaded HuggingFace Hub repository
44
-
45
- Returns:
46
- None
47
  """
48
- self.bbox_model = YOLO(path_hf_repo / "football-player-detection.pt")
49
- print(f"✅ BBox Model Loaded")
50
- self.keypoints_model = YOLO(path_hf_repo / "football-pitch-detection.pt")
51
- print(f"✅ Keypoints Model Loaded")
 
 
 
52
 
53
  def __repr__(self) -> str:
54
- """
55
- Information about miner returned in the health endpoint
56
- to inspect the loaded ML models (and their types)
57
- -----(Adjust as needed)----
58
- """
59
  return f"BBox Model: {type(self.bbox_model).__name__}\nKeypoints Model: {type(self.keypoints_model).__name__}"
60
 
61
  def predict_batch(
@@ -66,31 +53,19 @@ class Miner:
66
  ) -> list[TVFrameResult]:
67
  """
68
  Miner prediction for a batch of images.
69
- Handles the orchestration of ML models and any preprocessing and postprocessing
70
- -----(Adjust as needed)----
71
-
72
- Args:
73
- batch_images (list[np.ndarray]):
74
- A list of images (as NumPy arrays) to process in this batch.
75
- offset (int):
76
- The frame number corresponding to the first image in the batch.
77
- Used to correctly index frames in the output results.
78
- n_keypoints (int):
79
- The number of keypoints expected for each frame in this challenge type.
80
-
81
- Returns:
82
- list[TVFrameResult]:
83
- A list of predictions for each image in the batch
84
  """
85
 
86
  bboxes: dict[int, list[BoundingBox]] = {}
87
- bbox_model_results = self.bbox_model.predict(batch_images)
 
 
88
  if bbox_model_results is not None:
89
  for frame_number_in_batch, detection in enumerate(bbox_model_results):
90
  if not hasattr(detection, "boxes") or detection.boxes is None:
91
  continue
92
  boxes = []
93
  for box in detection.boxes.data:
 
94
  x1, y1, x2, y2, conf, cls_id = box.tolist()
95
  boxes.append(
96
  BoundingBox(
@@ -106,21 +81,33 @@ class Miner:
106
  print("✅ BBoxes predicted")
107
 
108
  keypoints: dict[int, tuple[int, int]] = {}
109
- keypoints_model_results = self.keypoints_model.predict(batch_images)
 
 
110
  if keypoints_model_results is not None:
111
  for frame_number_in_batch, detection in enumerate(keypoints_model_results):
112
  if not hasattr(detection, "keypoints") or detection.keypoints is None:
113
  continue
 
114
  frame_keypoints: list[tuple[int, int]] = []
115
- for part_points in detection.keypoints.data:
116
- for x, y, _ in part_points:
117
- frame_keypoints.append((int(x), int(y)))
 
 
 
 
 
 
 
 
118
  if len(frame_keypoints) < n_keypoints:
119
  frame_keypoints.extend(
120
  [(0, 0)] * (n_keypoints - len(frame_keypoints))
121
  )
122
  else:
123
  frame_keypoints = frame_keypoints[:n_keypoints]
 
124
  keypoints[offset + frame_number_in_batch] = frame_keypoints
125
  print("✅ Keypoints predicted")
126
 
 
27
  - Running batched predictions on images.
28
  - Parsing ML model outputs into structured results (TVFrameResult).
29
 
30
+ MODIFIED FOR TESTING: Uses standard yolov8n.pt and yolov8n-pose.pt
 
 
 
31
  """
32
 
33
  def __init__(self, path_hf_repo: Path) -> None:
34
  """
35
+ Loads all ML models.
 
 
 
 
 
 
 
 
36
  """
37
+ # Using standard YOLOv8 nano models that will be automatically downloaded
38
+ # if not present. This avoids the need for custom .pt files for testing.
39
+ self.bbox_model = YOLO("yolov8n.pt")
40
+ print(f"✅ BBox Model Loaded (yolov8n)")
41
+
42
+ self.keypoints_model = YOLO("yolov8n-pose.pt")
43
+ print(f"✅ Keypoints Model Loaded (yolov8n-pose)")
44
 
45
  def __repr__(self) -> str:
 
 
 
 
 
46
  return f"BBox Model: {type(self.bbox_model).__name__}\nKeypoints Model: {type(self.keypoints_model).__name__}"
47
 
48
  def predict_batch(
 
53
  ) -> list[TVFrameResult]:
54
  """
55
  Miner prediction for a batch of images.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  """
57
 
58
  bboxes: dict[int, list[BoundingBox]] = {}
59
+ # Run BBox prediction
60
+ bbox_model_results = self.bbox_model.predict(batch_images, verbose=False)
61
+
62
  if bbox_model_results is not None:
63
  for frame_number_in_batch, detection in enumerate(bbox_model_results):
64
  if not hasattr(detection, "boxes") or detection.boxes is None:
65
  continue
66
  boxes = []
67
  for box in detection.boxes.data:
68
+ # YOLOv8 standard output: x1, y1, x2, y2, conf, cls
69
  x1, y1, x2, y2, conf, cls_id = box.tolist()
70
  boxes.append(
71
  BoundingBox(
 
81
  print("✅ BBoxes predicted")
82
 
83
  keypoints: dict[int, tuple[int, int]] = {}
84
+ # Run Pose/Keypoints prediction
85
+ keypoints_model_results = self.keypoints_model.predict(batch_images, verbose=False)
86
+
87
  if keypoints_model_results is not None:
88
  for frame_number_in_batch, detection in enumerate(keypoints_model_results):
89
  if not hasattr(detection, "keypoints") or detection.keypoints is None:
90
  continue
91
+
92
  frame_keypoints: list[tuple[int, int]] = []
93
+
94
+ # Check if keypoints data exists and has the expected shape/content
95
+ if detection.keypoints.data is not None and len(detection.keypoints.data) > 0:
96
+ # Taking the first person detected for keypoints (simplification for testing)
97
+ # YOLO pose output is typically [num_people, num_kpts, 3] (x, y, conf)
98
+ first_person_kpts = detection.keypoints.data[0]
99
+ for kpt in first_person_kpts:
100
+ x, y = kpt[0], kpt[1] # extracting x, y
101
+ frame_keypoints.append((int(x), int(y)))
102
+
103
+ # Padding or truncating to match expected n_keypoints
104
  if len(frame_keypoints) < n_keypoints:
105
  frame_keypoints.extend(
106
  [(0, 0)] * (n_keypoints - len(frame_keypoints))
107
  )
108
  else:
109
  frame_keypoints = frame_keypoints[:n_keypoints]
110
+
111
  keypoints[offset + frame_number_in_batch] = frame_keypoints
112
  print("✅ Keypoints predicted")
113