Spaces:
Sleeping
Sleeping
Commit
·
2101309
1
Parent(s):
feafc47
🔧 修復光流特徵維度不匹配問題
Browse files- 改為計算每一幀的光流特徵 (50 frames × 10 dims)
- 確保光流特徵序列長度與關鍵點序列一致
- 修正張量形狀從 [1, 10] 到 [1, 50, 10]
- 添加調試輸出確認張量形狀
- 與訓練時的特徵維度保持一致
app.py
CHANGED
|
@@ -388,39 +388,50 @@ def predict_sign_language(video_path):
|
|
| 388 |
keypoints = extract_keypoints_from_frame(frame)
|
| 389 |
keypoints_sequence.append(keypoints)
|
| 390 |
|
| 391 |
-
#
|
| 392 |
flow_features = []
|
| 393 |
for i in range(len(frames) - 1):
|
| 394 |
flow = calculate_optical_flow_features(frames[i], frames[i + 1])
|
| 395 |
flow_features.append(flow)
|
| 396 |
|
| 397 |
-
#
|
| 398 |
-
if flow_features:
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
|
|
|
|
|
|
|
|
|
| 402 |
|
| 403 |
# 確保序列長度為50 (與訓練時一致)
|
| 404 |
target_length = 50
|
| 405 |
if len(keypoints_sequence) > target_length:
|
| 406 |
-
#
|
| 407 |
indices = np.linspace(0, len(keypoints_sequence) - 1, target_length, dtype=int)
|
| 408 |
keypoints_sequence = [keypoints_sequence[i] for i in indices]
|
|
|
|
| 409 |
elif len(keypoints_sequence) < target_length:
|
| 410 |
# 重複最後一幀
|
| 411 |
while len(keypoints_sequence) < target_length:
|
| 412 |
if keypoints_sequence:
|
| 413 |
keypoints_sequence.append(keypoints_sequence[-1])
|
|
|
|
| 414 |
else:
|
| 415 |
keypoints_sequence.append(np.zeros(225))
|
|
|
|
| 416 |
|
| 417 |
# 轉換為numpy數組再轉為tensor (避免警告)
|
| 418 |
keypoints_array = np.array(keypoints_sequence, dtype=np.float32)
|
|
|
|
|
|
|
| 419 |
keypoints_tensor = torch.from_numpy(keypoints_array).unsqueeze(0).to(device)
|
| 420 |
-
|
|
|
|
|
|
|
|
|
|
| 421 |
|
| 422 |
with torch.no_grad():
|
| 423 |
-
outputs = model(keypoints_tensor,
|
| 424 |
probabilities = torch.softmax(outputs, dim=1)
|
| 425 |
predicted_class = torch.argmax(probabilities, dim=1).item()
|
| 426 |
confidence = probabilities[0][predicted_class].item()
|
|
|
|
| 388 |
keypoints = extract_keypoints_from_frame(frame)
|
| 389 |
keypoints_sequence.append(keypoints)
|
| 390 |
|
| 391 |
+
# 計算每一幀的光流特徵
|
| 392 |
flow_features = []
|
| 393 |
for i in range(len(frames) - 1):
|
| 394 |
flow = calculate_optical_flow_features(frames[i], frames[i + 1])
|
| 395 |
flow_features.append(flow)
|
| 396 |
|
| 397 |
+
# 確保光流特徵的幀數與關鍵點一致
|
| 398 |
+
if len(flow_features) < len(keypoints_sequence):
|
| 399 |
+
# 如果光流特徵少於關鍵點幀數,複製最後一個光流特徵
|
| 400 |
+
while len(flow_features) < len(keypoints_sequence):
|
| 401 |
+
if flow_features:
|
| 402 |
+
flow_features.append(flow_features[-1])
|
| 403 |
+
else:
|
| 404 |
+
flow_features.append(np.zeros(10))
|
| 405 |
|
| 406 |
# 確保序列長度為50 (與訓練時一致)
|
| 407 |
target_length = 50
|
| 408 |
if len(keypoints_sequence) > target_length:
|
| 409 |
+
# 均勻採樣關鍵點和光流特徵
|
| 410 |
indices = np.linspace(0, len(keypoints_sequence) - 1, target_length, dtype=int)
|
| 411 |
keypoints_sequence = [keypoints_sequence[i] for i in indices]
|
| 412 |
+
flow_features = [flow_features[min(i, len(flow_features)-1)] for i in indices]
|
| 413 |
elif len(keypoints_sequence) < target_length:
|
| 414 |
# 重複最後一幀
|
| 415 |
while len(keypoints_sequence) < target_length:
|
| 416 |
if keypoints_sequence:
|
| 417 |
keypoints_sequence.append(keypoints_sequence[-1])
|
| 418 |
+
flow_features.append(flow_features[-1] if flow_features else np.zeros(10))
|
| 419 |
else:
|
| 420 |
keypoints_sequence.append(np.zeros(225))
|
| 421 |
+
flow_features.append(np.zeros(10))
|
| 422 |
|
| 423 |
# 轉換為numpy數組再轉為tensor (避免警告)
|
| 424 |
keypoints_array = np.array(keypoints_sequence, dtype=np.float32)
|
| 425 |
+
flow_array = np.array(flow_features, dtype=np.float32)
|
| 426 |
+
|
| 427 |
keypoints_tensor = torch.from_numpy(keypoints_array).unsqueeze(0).to(device)
|
| 428 |
+
flow_tensor = torch.from_numpy(flow_array).unsqueeze(0).to(device)
|
| 429 |
+
|
| 430 |
+
print(f"關鍵點張量形狀: {keypoints_tensor.shape}")
|
| 431 |
+
print(f"光流張量形狀: {flow_tensor.shape}")
|
| 432 |
|
| 433 |
with torch.no_grad():
|
| 434 |
+
outputs = model(keypoints_tensor, flow_tensor)
|
| 435 |
probabilities = torch.softmax(outputs, dim=1)
|
| 436 |
predicted_class = torch.argmax(probabilities, dim=1).item()
|
| 437 |
confidence = probabilities[0][predicted_class].item()
|