Spaces:

XiaoBai1221
/

SignView2.0

Sleeping

XiaoBai1221 commited on Jun 23, 2025

Commit

2101309

1 Parent(s): feafc47

🔧 修復光流特徵維度不匹配問題

- 改為計算每一幀的光流特徵 (50 frames × 10 dims)
- 確保光流特徵序列長度與關鍵點序列一致
- 修正張量形狀從 [1, 10] 到 [1, 50, 10]
- 添加調試輸出確認張量形狀
- 與訓練時的特徵維度保持一致

Files changed (1) hide show

app.py +20 -9

app.py CHANGED Viewed

@@ -388,39 +388,50 @@ def predict_sign_language(video_path):
             keypoints = extract_keypoints_from_frame(frame)
             keypoints_sequence.append(keypoints)
-        # 計算光流特徵
         flow_features = []
         for i in range(len(frames) - 1):
             flow = calculate_optical_flow_features(frames[i], frames[i + 1])
             flow_features.append(flow)
-        # 計算平均光流特徵
-        if flow_features:
-            optical_flow = np.mean(flow_features, axis=0)
-        else:
-            optical_flow = np.zeros(10)
         # 確保序列長度為50 (與訓練時一致)
         target_length = 50
         if len(keypoints_sequence) > target_length:
-            # 均勻採樣
             indices = np.linspace(0, len(keypoints_sequence) - 1, target_length, dtype=int)
             keypoints_sequence = [keypoints_sequence[i] for i in indices]
         elif len(keypoints_sequence) < target_length:
             # 重複最後一幀
             while len(keypoints_sequence) < target_length:
                 if keypoints_sequence:
                     keypoints_sequence.append(keypoints_sequence[-1])
                 else:
                     keypoints_sequence.append(np.zeros(225))
         # 轉換為numpy數組再轉為tensor (避免警告)
         keypoints_array = np.array(keypoints_sequence, dtype=np.float32)
         keypoints_tensor = torch.from_numpy(keypoints_array).unsqueeze(0).to(device)
-        optical_flow_tensor = torch.from_numpy(optical_flow.astype(np.float32)).unsqueeze(0).to(device)
         with torch.no_grad():
-            outputs = model(keypoints_tensor, optical_flow_tensor)
             probabilities = torch.softmax(outputs, dim=1)
             predicted_class = torch.argmax(probabilities, dim=1).item()
             confidence = probabilities[0][predicted_class].item()

             keypoints = extract_keypoints_from_frame(frame)
             keypoints_sequence.append(keypoints)
+        # 計算每一幀的光流特徵
         flow_features = []
         for i in range(len(frames) - 1):
             flow = calculate_optical_flow_features(frames[i], frames[i + 1])
             flow_features.append(flow)
+        # 確保光流特徵的幀數與關鍵點一致
+        if len(flow_features) < len(keypoints_sequence):
+            # 如果光流特徵少於關鍵點幀數，複製最後一個光流特徵
+            while len(flow_features) < len(keypoints_sequence):
+                if flow_features:
+                    flow_features.append(flow_features[-1])
+                else:
+                    flow_features.append(np.zeros(10))
         # 確保序列長度為50 (與訓練時一致)
         target_length = 50
         if len(keypoints_sequence) > target_length:
+            # 均勻採樣關鍵點和光流特徵
             indices = np.linspace(0, len(keypoints_sequence) - 1, target_length, dtype=int)
             keypoints_sequence = [keypoints_sequence[i] for i in indices]
+            flow_features = [flow_features[min(i, len(flow_features)-1)] for i in indices]
         elif len(keypoints_sequence) < target_length:
             # 重複最後一幀
             while len(keypoints_sequence) < target_length:
                 if keypoints_sequence:
                     keypoints_sequence.append(keypoints_sequence[-1])
+                    flow_features.append(flow_features[-1] if flow_features else np.zeros(10))
                 else:
                     keypoints_sequence.append(np.zeros(225))
+                    flow_features.append(np.zeros(10))
         # 轉換為numpy數組再轉為tensor (避免警告)
         keypoints_array = np.array(keypoints_sequence, dtype=np.float32)
+        flow_array = np.array(flow_features, dtype=np.float32)
         keypoints_tensor = torch.from_numpy(keypoints_array).unsqueeze(0).to(device)
+        flow_tensor = torch.from_numpy(flow_array).unsqueeze(0).to(device)
+        print(f"關鍵點張量形狀: {keypoints_tensor.shape}")
+        print(f"光流張量形狀: {flow_tensor.shape}")
         with torch.no_grad():
+            outputs = model(keypoints_tensor, flow_tensor)
             probabilities = torch.softmax(outputs, dim=1)
             predicted_class = torch.argmax(probabilities, dim=1).item()
             confidence = probabilities[0][predicted_class].item()