XiaoBai1221 commited on
Commit
feafc47
·
1 Parent(s): e198588

🔧 修復光流計算和張量轉換問題

Browse files

- 改進 calculate_optical_flow_features 函數的錯誤處理
- 使用 goodFeaturesToTrack + calcOpticalFlowPyrLK 的標準光流算法
- 修復 'npoints checkVector' 錯誤 - 確保有足夠的角點
- 優化張量轉換 - 使用 torch.from_numpy 避免警告
- 改進光流特徵計算 - 對所有幀計算再平均
- 增加 NaN 值處理確保數值穩定性

Files changed (1) hide show
  1. app.py +65 -40
app.py CHANGED
@@ -313,40 +313,57 @@ def extract_keypoints_from_frame(frame):
313
  print(f"關鍵點提取錯誤: {e}")
314
  return np.zeros(225, dtype=np.float32)
315
 
316
- def calculate_optical_flow_features(frames):
317
  """計算光流特徵"""
318
  try:
319
- if len(frames) < 2:
320
- return np.zeros(10, dtype=np.float32)
 
321
 
322
- flow_features = []
323
- for i in range(min(len(frames) - 1, 10)): # 最多計算10個光流
324
- gray1 = cv2.cvtColor(frames[i], cv2.COLOR_BGR2GRAY)
325
- gray2 = cv2.cvtColor(frames[i + 1], cv2.COLOR_BGR2GRAY)
326
-
327
- # 計算光流
328
- flow = cv2.calcOpticalFlowPyrLK(
329
- gray1, gray2, None, None,
330
- winSize=(15, 15),
331
- maxLevel=2,
332
- criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)
333
- )
334
-
335
- if flow[0] is not None and len(flow[0]) > 0:
336
- # 計算光流的平均大小
337
- flow_magnitude = np.mean(np.sqrt(flow[0].flatten()**2))
338
- flow_features.append(flow_magnitude)
339
- else:
340
- flow_features.append(0.0)
 
341
 
342
- # 確保有10個光流特徵
343
- while len(flow_features) < 10:
344
- flow_features.append(0.0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
 
346
- return np.array(flow_features[:10], dtype=np.float32)
347
  except Exception as e:
348
  print(f"光流計算錯誤: {e}")
349
- return np.zeros(10, dtype=np.float32)
350
 
351
  def predict_sign_language(video_path):
352
  """預測手語影片"""
@@ -371,31 +388,39 @@ def predict_sign_language(video_path):
371
  keypoints = extract_keypoints_from_frame(frame)
372
  keypoints_sequence.append(keypoints)
373
 
374
- optical_flow = calculate_optical_flow_features(frames)
 
 
 
 
 
 
 
 
 
 
375
 
376
  # 確保序列長度為50 (與訓練時一致)
377
  target_length = 50
378
  if len(keypoints_sequence) > target_length:
379
  # 均勻採樣
380
- indices = np.linspace(0, len(keypoints_sequence)-1, target_length, dtype=int)
381
  keypoints_sequence = [keypoints_sequence[i] for i in indices]
382
  elif len(keypoints_sequence) < target_length:
383
  # 重複最後一幀
384
- last_frame = keypoints_sequence[-1] if keypoints_sequence else np.zeros(225)
385
  while len(keypoints_sequence) < target_length:
386
- keypoints_sequence.append(last_frame)
387
-
388
- # 為每個時間步創建光流特徵
389
- flow_sequence = []
390
- for i in range(target_length):
391
- flow_sequence.append(optical_flow)
392
 
393
- # 轉換為tensor並預測
394
- keypoints_tensor = torch.tensor([keypoints_sequence], dtype=torch.float32).to(device)
395
- flow_tensor = torch.tensor([flow_sequence], dtype=torch.float32).to(device)
 
396
 
397
  with torch.no_grad():
398
- outputs = model(keypoints_tensor, flow_tensor)
399
  probabilities = torch.softmax(outputs, dim=1)
400
  predicted_class = torch.argmax(probabilities, dim=1).item()
401
  confidence = probabilities[0][predicted_class].item()
 
313
  print(f"關鍵點提取錯誤: {e}")
314
  return np.zeros(225, dtype=np.float32)
315
 
316
+ def calculate_optical_flow_features(frame1, frame2):
317
  """計算光流特徵"""
318
  try:
319
+ # 轉為灰階
320
+ gray1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
321
+ gray2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
322
 
323
+ # 檢測角點特徵
324
+ corners = cv2.goodFeaturesToTrack(gray1, maxCorners=100, qualityLevel=0.3, minDistance=7, blockSize=7)
325
+
326
+ # 如果沒有檢測到足夠的角點,返回零向量
327
+ if corners is None or len(corners) < 5:
328
+ return np.zeros(10)
329
+
330
+ # 確保角點格式正確
331
+ corners = np.float32(corners).reshape(-1, 1, 2)
332
+
333
+ # 計算光流
334
+ new_corners, status, error = cv2.calcOpticalFlowPyrLK(gray1, gray2, corners, None)
335
+
336
+ # 選擇好的角點
337
+ good_new = new_corners[status == 1]
338
+ good_old = corners[status == 1]
339
+
340
+ # 如果沒有足夠的好角點,返回零向量
341
+ if len(good_new) < 2 or len(good_old) < 2:
342
+ return np.zeros(10)
343
 
344
+ # 計算光流向量
345
+ flow_vectors = good_new - good_old
346
+
347
+ # 計算統計特徵
348
+ magnitude = np.sqrt(flow_vectors[:, 0]**2 + flow_vectors[:, 1]**2)
349
+ direction = np.arctan2(flow_vectors[:, 1], flow_vectors[:, 0])
350
+
351
+ # 提取10維特徵
352
+ features = [
353
+ np.mean(magnitude), np.std(magnitude), np.max(magnitude), np.min(magnitude),
354
+ np.mean(direction), np.std(direction),
355
+ np.mean(flow_vectors[:, 0]), np.std(flow_vectors[:, 0]),
356
+ np.mean(flow_vectors[:, 1]), np.std(flow_vectors[:, 1])
357
+ ]
358
+
359
+ # 處理 NaN 值
360
+ features = [f if not np.isnan(f) else 0.0 for f in features]
361
+
362
+ return np.array(features)
363
 
 
364
  except Exception as e:
365
  print(f"光流計算錯誤: {e}")
366
+ return np.zeros(10)
367
 
368
  def predict_sign_language(video_path):
369
  """預測手語影片"""
 
388
  keypoints = extract_keypoints_from_frame(frame)
389
  keypoints_sequence.append(keypoints)
390
 
391
+ # 計算光流特徵
392
+ flow_features = []
393
+ for i in range(len(frames) - 1):
394
+ flow = calculate_optical_flow_features(frames[i], frames[i + 1])
395
+ flow_features.append(flow)
396
+
397
+ # 計算平均光流特徵
398
+ if flow_features:
399
+ optical_flow = np.mean(flow_features, axis=0)
400
+ else:
401
+ optical_flow = np.zeros(10)
402
 
403
  # 確保序列長度為50 (與訓練時一致)
404
  target_length = 50
405
  if len(keypoints_sequence) > target_length:
406
  # 均勻採樣
407
+ indices = np.linspace(0, len(keypoints_sequence) - 1, target_length, dtype=int)
408
  keypoints_sequence = [keypoints_sequence[i] for i in indices]
409
  elif len(keypoints_sequence) < target_length:
410
  # 重複最後一幀
 
411
  while len(keypoints_sequence) < target_length:
412
+ if keypoints_sequence:
413
+ keypoints_sequence.append(keypoints_sequence[-1])
414
+ else:
415
+ keypoints_sequence.append(np.zeros(225))
 
 
416
 
417
+ # 轉換為numpy數組再轉為tensor (避免警告)
418
+ keypoints_array = np.array(keypoints_sequence, dtype=np.float32)
419
+ keypoints_tensor = torch.from_numpy(keypoints_array).unsqueeze(0).to(device)
420
+ optical_flow_tensor = torch.from_numpy(optical_flow.astype(np.float32)).unsqueeze(0).to(device)
421
 
422
  with torch.no_grad():
423
+ outputs = model(keypoints_tensor, optical_flow_tensor)
424
  probabilities = torch.softmax(outputs, dim=1)
425
  predicted_class = torch.argmax(probabilities, dim=1).item()
426
  confidence = probabilities[0][predicted_class].item()