Spaces:
Sleeping
Sleeping
Commit
·
feafc47
1
Parent(s):
e198588
🔧 修復光流計算和張量轉換問題
Browse files- 改進 calculate_optical_flow_features 函數的錯誤處理
- 使用 goodFeaturesToTrack + calcOpticalFlowPyrLK 的標準光流算法
- 修復 'npoints checkVector' 錯誤 - 確保有足夠的角點
- 優化張量轉換 - 使用 torch.from_numpy 避免警告
- 改進光流特徵計算 - 對所有幀計算再平均
- 增加 NaN 值處理確保數值穩定性
app.py
CHANGED
|
@@ -313,40 +313,57 @@ def extract_keypoints_from_frame(frame):
|
|
| 313 |
print(f"關鍵點提取錯誤: {e}")
|
| 314 |
return np.zeros(225, dtype=np.float32)
|
| 315 |
|
| 316 |
-
def calculate_optical_flow_features(
|
| 317 |
"""計算光流特徵"""
|
| 318 |
try:
|
| 319 |
-
|
| 320 |
-
|
|
|
|
| 321 |
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
|
|
|
| 341 |
|
| 342 |
-
#
|
| 343 |
-
|
| 344 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
|
| 346 |
-
return np.array(flow_features[:10], dtype=np.float32)
|
| 347 |
except Exception as e:
|
| 348 |
print(f"光流計算錯誤: {e}")
|
| 349 |
-
return np.zeros(10
|
| 350 |
|
| 351 |
def predict_sign_language(video_path):
|
| 352 |
"""預測手語影片"""
|
|
@@ -371,31 +388,39 @@ def predict_sign_language(video_path):
|
|
| 371 |
keypoints = extract_keypoints_from_frame(frame)
|
| 372 |
keypoints_sequence.append(keypoints)
|
| 373 |
|
| 374 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 375 |
|
| 376 |
# 確保序列長度為50 (與訓練時一致)
|
| 377 |
target_length = 50
|
| 378 |
if len(keypoints_sequence) > target_length:
|
| 379 |
# 均勻採樣
|
| 380 |
-
indices = np.linspace(0, len(keypoints_sequence)-1, target_length, dtype=int)
|
| 381 |
keypoints_sequence = [keypoints_sequence[i] for i in indices]
|
| 382 |
elif len(keypoints_sequence) < target_length:
|
| 383 |
# 重複最後一幀
|
| 384 |
-
last_frame = keypoints_sequence[-1] if keypoints_sequence else np.zeros(225)
|
| 385 |
while len(keypoints_sequence) < target_length:
|
| 386 |
-
keypoints_sequence
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
for i in range(target_length):
|
| 391 |
-
flow_sequence.append(optical_flow)
|
| 392 |
|
| 393 |
-
# 轉換為tensor
|
| 394 |
-
|
| 395 |
-
|
|
|
|
| 396 |
|
| 397 |
with torch.no_grad():
|
| 398 |
-
outputs = model(keypoints_tensor,
|
| 399 |
probabilities = torch.softmax(outputs, dim=1)
|
| 400 |
predicted_class = torch.argmax(probabilities, dim=1).item()
|
| 401 |
confidence = probabilities[0][predicted_class].item()
|
|
|
|
| 313 |
print(f"關鍵點提取錯誤: {e}")
|
| 314 |
return np.zeros(225, dtype=np.float32)
|
| 315 |
|
| 316 |
+
def calculate_optical_flow_features(frame1, frame2):
|
| 317 |
"""計算光流特徵"""
|
| 318 |
try:
|
| 319 |
+
# 轉為灰階
|
| 320 |
+
gray1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
|
| 321 |
+
gray2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
|
| 322 |
|
| 323 |
+
# 檢測角點特徵
|
| 324 |
+
corners = cv2.goodFeaturesToTrack(gray1, maxCorners=100, qualityLevel=0.3, minDistance=7, blockSize=7)
|
| 325 |
+
|
| 326 |
+
# 如果沒有檢測到足夠的角點,返回零向量
|
| 327 |
+
if corners is None or len(corners) < 5:
|
| 328 |
+
return np.zeros(10)
|
| 329 |
+
|
| 330 |
+
# 確保角點格式正確
|
| 331 |
+
corners = np.float32(corners).reshape(-1, 1, 2)
|
| 332 |
+
|
| 333 |
+
# 計算光流
|
| 334 |
+
new_corners, status, error = cv2.calcOpticalFlowPyrLK(gray1, gray2, corners, None)
|
| 335 |
+
|
| 336 |
+
# 選擇好的角點
|
| 337 |
+
good_new = new_corners[status == 1]
|
| 338 |
+
good_old = corners[status == 1]
|
| 339 |
+
|
| 340 |
+
# 如果沒有足夠的好角點,返回零向量
|
| 341 |
+
if len(good_new) < 2 or len(good_old) < 2:
|
| 342 |
+
return np.zeros(10)
|
| 343 |
|
| 344 |
+
# 計算光流向量
|
| 345 |
+
flow_vectors = good_new - good_old
|
| 346 |
+
|
| 347 |
+
# 計算統計特徵
|
| 348 |
+
magnitude = np.sqrt(flow_vectors[:, 0]**2 + flow_vectors[:, 1]**2)
|
| 349 |
+
direction = np.arctan2(flow_vectors[:, 1], flow_vectors[:, 0])
|
| 350 |
+
|
| 351 |
+
# 提取10維特徵
|
| 352 |
+
features = [
|
| 353 |
+
np.mean(magnitude), np.std(magnitude), np.max(magnitude), np.min(magnitude),
|
| 354 |
+
np.mean(direction), np.std(direction),
|
| 355 |
+
np.mean(flow_vectors[:, 0]), np.std(flow_vectors[:, 0]),
|
| 356 |
+
np.mean(flow_vectors[:, 1]), np.std(flow_vectors[:, 1])
|
| 357 |
+
]
|
| 358 |
+
|
| 359 |
+
# 處理 NaN 值
|
| 360 |
+
features = [f if not np.isnan(f) else 0.0 for f in features]
|
| 361 |
+
|
| 362 |
+
return np.array(features)
|
| 363 |
|
|
|
|
| 364 |
except Exception as e:
|
| 365 |
print(f"光流計算錯誤: {e}")
|
| 366 |
+
return np.zeros(10)
|
| 367 |
|
| 368 |
def predict_sign_language(video_path):
|
| 369 |
"""預測手語影片"""
|
|
|
|
| 388 |
keypoints = extract_keypoints_from_frame(frame)
|
| 389 |
keypoints_sequence.append(keypoints)
|
| 390 |
|
| 391 |
+
# 計算光流特徵
|
| 392 |
+
flow_features = []
|
| 393 |
+
for i in range(len(frames) - 1):
|
| 394 |
+
flow = calculate_optical_flow_features(frames[i], frames[i + 1])
|
| 395 |
+
flow_features.append(flow)
|
| 396 |
+
|
| 397 |
+
# 計算平均光流特徵
|
| 398 |
+
if flow_features:
|
| 399 |
+
optical_flow = np.mean(flow_features, axis=0)
|
| 400 |
+
else:
|
| 401 |
+
optical_flow = np.zeros(10)
|
| 402 |
|
| 403 |
# 確保序列長度為50 (與訓練時一致)
|
| 404 |
target_length = 50
|
| 405 |
if len(keypoints_sequence) > target_length:
|
| 406 |
# 均勻採樣
|
| 407 |
+
indices = np.linspace(0, len(keypoints_sequence) - 1, target_length, dtype=int)
|
| 408 |
keypoints_sequence = [keypoints_sequence[i] for i in indices]
|
| 409 |
elif len(keypoints_sequence) < target_length:
|
| 410 |
# 重複最後一幀
|
|
|
|
| 411 |
while len(keypoints_sequence) < target_length:
|
| 412 |
+
if keypoints_sequence:
|
| 413 |
+
keypoints_sequence.append(keypoints_sequence[-1])
|
| 414 |
+
else:
|
| 415 |
+
keypoints_sequence.append(np.zeros(225))
|
|
|
|
|
|
|
| 416 |
|
| 417 |
+
# 轉換為numpy數組再轉為tensor (避免警告)
|
| 418 |
+
keypoints_array = np.array(keypoints_sequence, dtype=np.float32)
|
| 419 |
+
keypoints_tensor = torch.from_numpy(keypoints_array).unsqueeze(0).to(device)
|
| 420 |
+
optical_flow_tensor = torch.from_numpy(optical_flow.astype(np.float32)).unsqueeze(0).to(device)
|
| 421 |
|
| 422 |
with torch.no_grad():
|
| 423 |
+
outputs = model(keypoints_tensor, optical_flow_tensor)
|
| 424 |
probabilities = torch.softmax(outputs, dim=1)
|
| 425 |
predicted_class = torch.argmax(probabilities, dim=1).item()
|
| 426 |
confidence = probabilities[0][predicted_class].item()
|