Spaces:

XiaoBai1221
/

SignView2.0

Sleeping

XiaoBai1221 commited on Jun 23, 2025

Commit

feafc47

1 Parent(s): e198588

🔧 修復光流計算和張量轉換問題

- 改進 calculate_optical_flow_features 函數的錯誤處理
- 使用 goodFeaturesToTrack + calcOpticalFlowPyrLK 的標準光流算法
- 修復 'npoints checkVector' 錯誤 - 確保有足夠的角點
- 優化張量轉換 - 使用 torch.from_numpy 避免警告
- 改進光流特徵計算 - 對所有幀計算再平均
- 增加 NaN 值處理確保數值穩定性

Files changed (1) hide show

app.py +65 -40

app.py CHANGED Viewed

@@ -313,40 +313,57 @@ def extract_keypoints_from_frame(frame):
         print(f"關鍵點提取錯誤: {e}")
         return np.zeros(225, dtype=np.float32)
-def calculate_optical_flow_features(frames):
     """計算光流特徵"""
     try:
-        if len(frames) < 2:
-            return np.zeros(10, dtype=np.float32)
-        flow_features = []
-        for i in range(min(len(frames) - 1, 10)):  # 最多計算10個光流
-            gray1 = cv2.cvtColor(frames[i], cv2.COLOR_BGR2GRAY)
-            gray2 = cv2.cvtColor(frames[i + 1], cv2.COLOR_BGR2GRAY)
-            # 計算光流
-            flow = cv2.calcOpticalFlowPyrLK(
-                gray1, gray2, None, None,
-                winSize=(15, 15),
-                maxLevel=2,
-                criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)
-            )
-            if flow[0] is not None and len(flow[0]) > 0:
-                # 計算光流的平均大小
-                flow_magnitude = np.mean(np.sqrt(flow[0].flatten()**2))
-                flow_features.append(flow_magnitude)
-            else:
-                flow_features.append(0.0)
-        # 確保有10個光流特徵
-        while len(flow_features) < 10:
-            flow_features.append(0.0)
-        return np.array(flow_features[:10], dtype=np.float32)
     except Exception as e:
         print(f"光流計算錯誤: {e}")
-        return np.zeros(10, dtype=np.float32)
 def predict_sign_language(video_path):
     """預測手語影片"""
@@ -371,31 +388,39 @@ def predict_sign_language(video_path):
             keypoints = extract_keypoints_from_frame(frame)
             keypoints_sequence.append(keypoints)
-        optical_flow = calculate_optical_flow_features(frames)
         # 確保序列長度為50 (與訓練時一致)
         target_length = 50
         if len(keypoints_sequence) > target_length:
             # 均勻採樣
-            indices = np.linspace(0, len(keypoints_sequence)-1, target_length, dtype=int)
             keypoints_sequence = [keypoints_sequence[i] for i in indices]
         elif len(keypoints_sequence) < target_length:
             # 重複最後一幀
-            last_frame = keypoints_sequence[-1] if keypoints_sequence else np.zeros(225)
             while len(keypoints_sequence) < target_length:
-                keypoints_sequence.append(last_frame)
-        # 為每個時間步創建光流特徵
-        flow_sequence = []
-        for i in range(target_length):
-            flow_sequence.append(optical_flow)
-        # 轉換為tensor並預測
-        keypoints_tensor = torch.tensor([keypoints_sequence], dtype=torch.float32).to(device)
-        flow_tensor = torch.tensor([flow_sequence], dtype=torch.float32).to(device)
         with torch.no_grad():
-            outputs = model(keypoints_tensor, flow_tensor)
             probabilities = torch.softmax(outputs, dim=1)
             predicted_class = torch.argmax(probabilities, dim=1).item()
             confidence = probabilities[0][predicted_class].item()

         print(f"關鍵點提取錯誤: {e}")
         return np.zeros(225, dtype=np.float32)
+def calculate_optical_flow_features(frame1, frame2):
     """計算光流特徵"""
     try:
+        # 轉為灰階
+        gray1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
+        gray2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
+        # 檢測角點特徵
+        corners = cv2.goodFeaturesToTrack(gray1, maxCorners=100, qualityLevel=0.3, minDistance=7, blockSize=7)
+        # 如果沒有檢測到足夠的角點，返回零向量
+        if corners is None or len(corners) < 5:
+            return np.zeros(10)
+        # 確保角點格式正確
+        corners = np.float32(corners).reshape(-1, 1, 2)
+        # 計算光流
+        new_corners, status, error = cv2.calcOpticalFlowPyrLK(gray1, gray2, corners, None)
+        # 選擇好的角點
+        good_new = new_corners[status == 1]
+        good_old = corners[status == 1]
+        # 如果沒有足夠的好角點，返回零向量
+        if len(good_new) < 2 or len(good_old) < 2:
+            return np.zeros(10)
+        # 計算光流向量
+        flow_vectors = good_new - good_old
+        # 計算統計特徵
+        magnitude = np.sqrt(flow_vectors[:, 0]**2 + flow_vectors[:, 1]**2)
+        direction = np.arctan2(flow_vectors[:, 1], flow_vectors[:, 0])
+        # 提取10維特徵
+        features = [
+            np.mean(magnitude), np.std(magnitude), np.max(magnitude), np.min(magnitude),
+            np.mean(direction), np.std(direction),
+            np.mean(flow_vectors[:, 0]), np.std(flow_vectors[:, 0]),
+            np.mean(flow_vectors[:, 1]), np.std(flow_vectors[:, 1])
+        ]
+        # 處理 NaN 值
+        features = [f if not np.isnan(f) else 0.0 for f in features]
+        return np.array(features)
     except Exception as e:
         print(f"光流計算錯誤: {e}")
+        return np.zeros(10)
 def predict_sign_language(video_path):
     """預測手語影片"""
             keypoints = extract_keypoints_from_frame(frame)
             keypoints_sequence.append(keypoints)
+        # 計算光流特徵
+        flow_features = []
+        for i in range(len(frames) - 1):
+            flow = calculate_optical_flow_features(frames[i], frames[i + 1])
+            flow_features.append(flow)
+        # 計算平均光流特徵
+        if flow_features:
+            optical_flow = np.mean(flow_features, axis=0)
+        else:
+            optical_flow = np.zeros(10)
         # 確保序列長度為50 (與訓練時一致)
         target_length = 50
         if len(keypoints_sequence) > target_length:
             # 均勻採樣
+            indices = np.linspace(0, len(keypoints_sequence) - 1, target_length, dtype=int)
             keypoints_sequence = [keypoints_sequence[i] for i in indices]
         elif len(keypoints_sequence) < target_length:
             # 重複最後一幀
             while len(keypoints_sequence) < target_length:
+                if keypoints_sequence:
+                    keypoints_sequence.append(keypoints_sequence[-1])
+                else:
+                    keypoints_sequence.append(np.zeros(225))
+        # 轉換為numpy數組再轉為tensor (避免警告)
+        keypoints_array = np.array(keypoints_sequence, dtype=np.float32)
+        keypoints_tensor = torch.from_numpy(keypoints_array).unsqueeze(0).to(device)
+        optical_flow_tensor = torch.from_numpy(optical_flow.astype(np.float32)).unsqueeze(0).to(device)
         with torch.no_grad():
+            outputs = model(keypoints_tensor, optical_flow_tensor)
             probabilities = torch.softmax(outputs, dim=1)
             predicted_class = torch.argmax(probabilities, dim=1).item()
             confidence = probabilities[0][predicted_class].item()