aivertex95827 commited on
Commit
bf0a356
Β·
verified Β·
1 Parent(s): 60f4903

Update miner.py

Browse files
Files changed (1) hide show
  1. miner.py +28 -107
miner.py CHANGED
@@ -18,7 +18,6 @@ from collections import OrderedDict, defaultdict
18
  from PIL import Image
19
  import torchvision.transforms as T
20
  import time
21
- import onnxruntime as ort
22
 
23
  try:
24
  from scipy.optimize import linear_sum_assignment as _linear_sum_assignment
@@ -1016,8 +1015,8 @@ TEMPLATE_F1: List[Tuple[float, float]] = [
1016
  HOMOGRAPHY_FILL_ONLY_VALID = True
1017
  KP_THRESHOLD = 0.2 # new-5 style (was 0.3)
1018
  # HRNet: smaller input = faster; 432x768 balances speed/accuracy (new-2 style)
1019
- # KP_H, KP_W = 540, 960
1020
- KP_H, KP_W = 432, 768
1021
 
1022
 
1023
  def _preprocess_batch(frames):
@@ -1028,7 +1027,6 @@ def _preprocess_batch(frames):
1028
  img = cv2.resize(img, (target_w, target_h)).astype(np.float32) / 255.0
1029
  batch.append(np.transpose(img, (2, 0, 1)))
1030
  return torch.from_numpy(np.stack(batch)).float()
1031
- # return np.stack(batch)
1032
 
1033
 
1034
  def _extract_keypoints(heatmap: torch.Tensor, scale: int = 2):
@@ -1062,77 +1060,19 @@ def _process_keypoints(kp_coords, threshold, w, h, batch_size):
1062
  def _run_hrnet_batch(frames, model, threshold, batch_size=16):
1063
  if not frames or model is None:
1064
  return []
1065
- # device = next(model.parameters()).device
1066
- # use_amp = device.type == "cuda"
1067
- output_shape = (batch_size, 58, 270, 480)
1068
- output_tensor = torch.empty(output_shape, dtype=torch.float32, device='cuda')
1069
- io_binding = model.io_binding()
1070
-
1071
- input_name = model.get_inputs()[0].name
1072
- output_name = model.get_outputs()[0].name
1073
  results = []
1074
-
1075
  for i in range(0, len(frames), batch_size):
1076
  chunk = frames[i:i + batch_size]
1077
- curr_bs = len(chunk)
1078
-
1079
- # Preprocess stays on GPU
1080
- # Ensure _preprocess_batch returns a GPU tensor
1081
- batch_cuda = _preprocess_batch(chunk).to('cuda:0')
1082
-
1083
- # --- I/O BINDING (The "Keep on GPU" secret) ---
1084
- io_binding.bind_input(
1085
- name=input_name,
1086
- device_type='cuda',
1087
- device_id=0,
1088
- element_type=np.float32,
1089
- shape=batch_cuda.shape,
1090
- buffer_ptr=batch_cuda.data_ptr()
1091
- )
1092
-
1093
- io_binding.bind_output(
1094
- name=output_name,
1095
- device_type='cuda',
1096
- device_id=0,
1097
- element_type=np.float32,
1098
- shape=(curr_bs, 58, 270, 480), # Current dynamic shape
1099
- buffer_ptr=output_tensor.data_ptr()
1100
- )
1101
-
1102
- # Sync and Run (Zero CPU usage here)
1103
- model.run_with_iobinding(io_binding)
1104
-
1105
- # Access the output directly from GPU memory
1106
- # We slice it to match current batch size
1107
- heatmaps = output_tensor[:curr_bs]
1108
-
1109
- # 3. GPU Post-processing
1110
- # Ensure _extract_keypoints uses torch functions (not cv2/numpy)
1111
  kp_coords = _extract_keypoints(heatmaps[:, :-1, :, :], scale=2)
1112
- batch_kps = _process_keypoints(kp_coords, threshold, KP_W, KP_H, curr_bs)
1113
-
1114
  results.extend(batch_kps)
1115
- # device = 'cuda'
1116
- # results = []
1117
- # torch.cuda.empty_cache()
1118
- # for i in range(0, len(frames), batch_size):
1119
- # chunk = frames[i:i + batch_size]
1120
- # batch = _preprocess_batch(chunk)#.to(device, non_blocking=True)
1121
- # print(batch.shape, flush=True)
1122
-
1123
- # input_name = model.get_inputs()[0].name
1124
- # outputs = model.run(None, {input_name: batch})
1125
- # print(f"Inference output shape: {outputs[0].shape}")
1126
- # heatmaps = torch.from_numpy(outputs[0]).to(device)
1127
- # del outputs
1128
- # gc.collect()
1129
- # # with torch.inference_mode():
1130
- # # with torch.amp.autocast("cuda", enabled=use_amp):
1131
- # # heatmaps = model(batch)
1132
- # kp_coords = _extract_keypoints(heatmaps[:, :-1, :, :], scale=2)
1133
- # batch_kps = _process_keypoints(kp_coords, threshold, KP_W, KP_H, len(chunk))
1134
- # results.extend(batch_kps)
1135
- # del heatmaps, kp_coords, batch
1136
  if results:
1137
  gc.collect()
1138
  return results
@@ -1316,7 +1256,6 @@ def _smooth_boxes(
1316
 
1317
  # ── Miner ─────────────────────────────────────────────────────────────────────
1318
 
1319
- HRNET_BATCH_SIZE = 8 # larger batch = faster (if GPU mem allows)
1320
  class Miner:
1321
  def __init__(self, path_hf_repo: Path) -> None:
1322
  self.path_hf_repo = Path(path_hf_repo)
@@ -1348,30 +1287,16 @@ class Miner:
1348
  print(f"⚠️ OSNet weights not found at {osnet_weight_path}. Using HSV fallback.")
1349
 
1350
  # Keypoints model: HRNet
1351
- # kp_config_file = "hrnetv2_w48.yaml"
1352
- # kp_weights_file = "keypoint_detect.pt"
1353
- # config_path = Path(kp_config_file) if Path(kp_config_file).exists() else self.path_hf_repo / kp_config_file
1354
- # weights_path = Path(kp_weights_file) if Path(kp_weights_file).exists() else self.path_hf_repo / kp_weights_file
1355
- # cfg = yaml.safe_load(open(config_path, 'r'))
1356
- # hrnet = get_cls_net(cfg)
1357
- # state = torch.load(weights_path, map_location=device, weights_only=False)
1358
- # hrnet.load_state_dict(state)
1359
- # hrnet.to(device).eval()
1360
- # self.keypoints_model = hrnet
1361
-
1362
- available = ort.get_available_providers()
1363
- print(f"Available Providers: {available}")
1364
- if 'CUDAExecutionProvider' not in ort.get_available_providers():
1365
- raise RuntimeError("ONNX Runtime cannot find CUDA! Check your onnxruntime-gpu installation.")
1366
- providers = [
1367
- ('CUDAExecutionProvider', {
1368
- 'device_id': 0,
1369
- 'arena_extend_strategy': 'kSameAsRequested',
1370
- })
1371
- ]
1372
-
1373
- session = ort.InferenceSession(self.path_hf_repo / "hrnet_final.onnx", providers=providers)
1374
- self.keypoints_model = session
1375
  print("βœ… HRNet Keypoints Model Loaded")
1376
 
1377
  # Person detection state (new-2 style)
@@ -1630,11 +1555,11 @@ class Miner:
1630
  return []
1631
  if self.keypoints_model is None:
1632
  return [[(0, 0)] * n_keypoints for _ in images]
1633
- # try:
1634
- raw_kps = _run_hrnet_batch(images, self.keypoints_model, KP_THRESHOLD, batch_size=HRNET_BATCH_SIZE)
1635
- # except Exception as e:
1636
- # print(f"Error in _keypoint_task: {e}")
1637
- # return [[(0, 0)] * n_keypoints for _ in images]
1638
  raw_kps = [_apply_keypoint_mapping(kp) for kp in raw_kps] if raw_kps else []
1639
  keypoints = _normalize_keypoints(raw_kps, images, n_keypoints) if raw_kps else [[(0, 0)] * n_keypoints for _ in images]
1640
  keypoints = [_fix_keypoints(kps, n_keypoints) for kps in keypoints]
@@ -1667,16 +1592,12 @@ class Miner:
1667
  torch.cuda.empty_cache()
1668
 
1669
  # Run bbox (batched YOLO) and keypoints in parallel
1670
- # future_bbox = self._executor.submit(self._bbox_task, images, offset)
1671
  future_kp = self._executor.submit(self._keypoint_task, images, n_keypoints)
1672
- # bbox_per_frame = future_bbox.result()
1673
  keypoints = future_kp.result()
1674
 
1675
  return [
1676
- TVFrameResult(
1677
- frame_id=offset + i,
1678
- # boxes=bbox_per_frame[i],
1679
- boxes=[],
1680
- keypoints=keypoints[i])
1681
  for i in range(len(images))
1682
  ]
 
18
  from PIL import Image
19
  import torchvision.transforms as T
20
  import time
 
21
 
22
  try:
23
  from scipy.optimize import linear_sum_assignment as _linear_sum_assignment
 
1015
  HOMOGRAPHY_FILL_ONLY_VALID = True
1016
  KP_THRESHOLD = 0.2 # new-5 style (was 0.3)
1017
  # HRNet: smaller input = faster; 432x768 balances speed/accuracy (new-2 style)
1018
+ KP_H, KP_W = 360, 640
1019
+ HRNET_BATCH_SIZE = 8 # larger batch = faster (if GPU mem allows)
1020
 
1021
 
1022
  def _preprocess_batch(frames):
 
1027
  img = cv2.resize(img, (target_w, target_h)).astype(np.float32) / 255.0
1028
  batch.append(np.transpose(img, (2, 0, 1)))
1029
  return torch.from_numpy(np.stack(batch)).float()
 
1030
 
1031
 
1032
  def _extract_keypoints(heatmap: torch.Tensor, scale: int = 2):
 
1060
  def _run_hrnet_batch(frames, model, threshold, batch_size=16):
1061
  if not frames or model is None:
1062
  return []
1063
+ device = next(model.parameters()).device
1064
+ use_amp = device.type == "cuda"
 
 
 
 
 
 
1065
  results = []
 
1066
  for i in range(0, len(frames), batch_size):
1067
  chunk = frames[i:i + batch_size]
1068
+ batch = _preprocess_batch(chunk).to(device, non_blocking=True)
1069
+ with torch.inference_mode():
1070
+ with torch.amp.autocast("cuda", enabled=use_amp):
1071
+ heatmaps = model(batch)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1072
  kp_coords = _extract_keypoints(heatmaps[:, :-1, :, :], scale=2)
1073
+ batch_kps = _process_keypoints(kp_coords, threshold, KP_W, KP_H, len(chunk))
 
1074
  results.extend(batch_kps)
1075
+ del heatmaps, kp_coords, batch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1076
  if results:
1077
  gc.collect()
1078
  return results
 
1256
 
1257
  # ── Miner ─────────────────────────────────────────────────────────────────────
1258
 
 
1259
  class Miner:
1260
  def __init__(self, path_hf_repo: Path) -> None:
1261
  self.path_hf_repo = Path(path_hf_repo)
 
1287
  print(f"⚠️ OSNet weights not found at {osnet_weight_path}. Using HSV fallback.")
1288
 
1289
  # Keypoints model: HRNet
1290
+ kp_config_file = "hrnetv2_w48.yaml"
1291
+ kp_weights_file = "keypoint_detect.pt"
1292
+ config_path = Path(kp_config_file) if Path(kp_config_file).exists() else self.path_hf_repo / kp_config_file
1293
+ weights_path = Path(kp_weights_file) if Path(kp_weights_file).exists() else self.path_hf_repo / kp_weights_file
1294
+ cfg = yaml.safe_load(open(config_path, 'r'))
1295
+ hrnet = get_cls_net(cfg)
1296
+ state = torch.load(weights_path, map_location=device, weights_only=False)
1297
+ hrnet.load_state_dict(state)
1298
+ hrnet.to(device).eval()
1299
+ self.keypoints_model = hrnet
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1300
  print("βœ… HRNet Keypoints Model Loaded")
1301
 
1302
  # Person detection state (new-2 style)
 
1555
  return []
1556
  if self.keypoints_model is None:
1557
  return [[(0, 0)] * n_keypoints for _ in images]
1558
+ try:
1559
+ raw_kps = _run_hrnet_batch(images, self.keypoints_model, KP_THRESHOLD, batch_size=HRNET_BATCH_SIZE)
1560
+ except Exception as e:
1561
+ print(f"Error in _keypoint_task: {e}")
1562
+ return [[(0, 0)] * n_keypoints for _ in images]
1563
  raw_kps = [_apply_keypoint_mapping(kp) for kp in raw_kps] if raw_kps else []
1564
  keypoints = _normalize_keypoints(raw_kps, images, n_keypoints) if raw_kps else [[(0, 0)] * n_keypoints for _ in images]
1565
  keypoints = [_fix_keypoints(kps, n_keypoints) for kps in keypoints]
 
1592
  torch.cuda.empty_cache()
1593
 
1594
  # Run bbox (batched YOLO) and keypoints in parallel
1595
+ future_bbox = self._executor.submit(self._bbox_task, images, offset)
1596
  future_kp = self._executor.submit(self._keypoint_task, images, n_keypoints)
1597
+ bbox_per_frame = future_bbox.result()
1598
  keypoints = future_kp.result()
1599
 
1600
  return [
1601
+ TVFrameResult(frame_id=offset + i, boxes=bbox_per_frame[i], keypoints=keypoints[i])
 
 
 
 
1602
  for i in range(len(images))
1603
  ]