XiaoBai1221 commited on
Commit
4e94e65
·
1 Parent(s): cc931c8

最終修復

Browse files
Files changed (4) hide show
  1. Dockerfile +1 -0
  2. app.py +211 -118
  3. app_config.py +1 -0
  4. requirements.txt +1 -0
Dockerfile CHANGED
@@ -14,6 +14,7 @@ RUN apt-get update && apt-get install -y \
14
  libgomp1 \
15
  libgl1-mesa-glx \
16
  libglib2.0-dev \
 
17
  && rm -rf /var/lib/apt/lists/*
18
 
19
  # 複製依賴檔案
 
14
  libgomp1 \
15
  libgl1-mesa-glx \
16
  libglib2.0-dev \
17
+ ffmpeg \
18
  && rm -rf /var/lib/apt/lists/*
19
 
20
  # 複製依賴檔案
app.py CHANGED
@@ -2,6 +2,8 @@
2
  # -*- coding: utf-8 -*-
3
 
4
  import os
 
 
5
  import json
6
  import requests
7
  import cv2
@@ -20,6 +22,16 @@ from werkzeug.utils import secure_filename
20
  from datetime import datetime
21
  from flask_socketio import SocketIO, emit
22
  from openai import OpenAI
 
 
 
 
 
 
 
 
 
 
23
 
24
  # 環境變數設定
25
  # OpenAI API KEY 應該從環境變數獲取,不要硬編碼
@@ -34,15 +46,19 @@ os.environ['MEDIAPIPE_DISABLE_GPU'] = '1' # 禁用GPU避免警告
34
  IS_HUGGINGFACE = os.environ.get('SPACE_ID') is not None
35
  IS_LOCAL_DEV = not IS_HUGGINGFACE
36
 
 
 
 
37
  # Flask 應用初始化
38
  app = Flask(__name__)
39
  app.config['SECRET_KEY'] = 'sign_language_secret_key'
40
- app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 100MB max file size
41
- socketio = SocketIO(app, cors_allowed_origins="*", async_mode='threading')
42
 
43
  # Messenger Bot 設定
44
- VERIFY_TOKEN = os.environ.get('VERIFY_TOKEN', 'your_verify_token')
45
- PAGE_ACCESS_TOKEN = os.environ.get('PAGE_ACCESS_TOKEN', 'your_page_access_token')
 
46
  FACEBOOK_API_URL = 'https://graph.facebook.com/v18.0/me/messages'
47
 
48
  # 路徑設定 - 適應不同環境
@@ -77,6 +93,22 @@ class FeatureExtractor:
77
  self.mp_holistic = mp.solutions.holistic
78
  self.mp_drawing = mp.solutions.drawing_utils
79
  self.mp_drawing_styles = mp.solutions.drawing_styles
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
  def extract_pose_keypoints(self, frame, holistic_results):
82
  """提取骨架關鍵點"""
@@ -226,6 +258,7 @@ class VideoSignLanguageRecognizer:
226
  def __init__(self, model_path, threshold=0.7):
227
  self.model_path = model_path
228
  self.threshold = threshold
 
229
 
230
  # 初始化特徵提取器
231
  self.feature_extractor = FeatureExtractor()
@@ -239,7 +272,7 @@ class VideoSignLanguageRecognizer:
239
 
240
  # GPT整合
241
  try:
242
- self.openai_client = OpenAI()
243
  except Exception as e:
244
  print(f"初始化OpenAI客户端出錯: {e}")
245
  self.openai_client = None
@@ -247,37 +280,8 @@ class VideoSignLanguageRecognizer:
247
  print(f"影片辨識器初始化完成!使用設備: {self.device}")
248
 
249
  def _load_label_mapping(self):
250
- """加載標籤映射"""
251
- label_map = {}
252
-
253
- # 嘗試從 labels.csv 讀取
254
- labels_file = LABELS_PATH
255
- print(f"🔍 嘗試載入標籤檔案: {labels_file}")
256
- print(f"📂 當前工作目錄: {os.getcwd()}")
257
-
258
- if os.path.exists(labels_file):
259
- try:
260
- df = pd.read_csv(labels_file)
261
- print(f"📄 標籤檔案內容:")
262
- print(df)
263
-
264
- for _, row in df.iterrows():
265
- label_map[int(row['index'])] = row['label']
266
- print(f"✅ 從 {labels_file} 載入了 {len(label_map)} 個類別標籤")
267
- print(f"📊 標籤映射: {label_map}")
268
- except Exception as e:
269
- print(f"❌ 讀取 labels.csv 出錯: {e}")
270
- # 使用默認映射
271
- label_map = {0: "eat", 1: "fish", 2: "like", 3: "want"}
272
- else:
273
- print(f"❌ 標籤檔案不存在: {labels_file}")
274
-
275
- if not label_map:
276
- # 使用默認映射
277
- label_map = {0: "eat", 1: "fish", 2: "like", 3: "want"}
278
- print(f"⚠️ 使用預設標籤映射: {label_map}")
279
-
280
- return label_map
281
 
282
  def _load_model(self):
283
  """加載訓練好的模型"""
@@ -327,6 +331,9 @@ class VideoSignLanguageRecognizer:
327
  # 提取特徵序列
328
  keypoints_sequence = []
329
  frame_count = 0
 
 
 
330
 
331
  while True:
332
  ret, frame = cap.read()
@@ -335,9 +342,22 @@ class VideoSignLanguageRecognizer:
335
 
336
  # 跳幀處理
337
  if frame_count % 5 == 0: # 每5幀處理一次
338
- keypoints, _ = self._extract_features(frame)
339
  if keypoints is not None:
340
  keypoints_sequence.append(keypoints)
 
 
 
 
 
 
 
 
 
 
 
 
 
341
 
342
  frame_count += 1
343
 
@@ -351,7 +371,18 @@ class VideoSignLanguageRecognizer:
351
  print(f"❌ 有效幀數不足,無法進行辨識")
352
  return None, 0
353
 
354
- # 進行預測
 
 
 
 
 
 
 
 
 
 
 
355
  prediction, confidence, word_sequence, probabilities = self._predict_from_sequence(keypoints_sequence)
356
 
357
  # 使用GPT生成完整句子
@@ -365,34 +396,29 @@ class VideoSignLanguageRecognizer:
365
  'word_sequence': word_sequence,
366
  'confidence': confidence,
367
  'probabilities': probabilities,
368
- 'generated_sentence': generated_sentence
 
 
 
369
  }
370
 
371
  def _extract_features(self, frame):
372
  """從單一幀提取手部和姿勢特徵"""
373
- with self.feature_extractor.mp_holistic.Holistic(
374
- static_image_mode=False,
375
- model_complexity=1,
376
- smooth_landmarks=True,
377
- enable_segmentation=False,
378
- min_detection_confidence=0.1,
379
- min_tracking_confidence=0.1
380
- ) as holistic:
381
- # 轉為RGB
382
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
383
-
384
- # 處理圖像
385
- results = holistic.process(frame_rgb)
386
-
387
- # 檢查是否有手部被檢測到
388
- hands_detected = (results.left_hand_landmarks is not None or
389
- results.right_hand_landmarks is not None)
390
-
391
- try:
392
- keypoints = self.feature_extractor.extract_pose_keypoints(frame, results)
393
- return keypoints, hands_detected
394
- except Exception as e:
395
- return None, hands_detected
396
 
397
  def _predict_from_sequence(self, keypoints_sequence):
398
  """從關鍵點序列進行預測"""
@@ -411,7 +437,8 @@ class VideoSignLanguageRecognizer:
411
  # 提取所有類別的機率
412
  probs = probabilities[0].cpu().numpy()
413
 
414
- if confidence >= self.threshold:
 
415
  predicted_word = self.label_map.get(predicted_class, f"類別{predicted_class}")
416
  word_sequence = [predicted_word]
417
  else:
@@ -460,6 +487,7 @@ class SignLanguageRecognizer:
460
  def __init__(self, model_path, frame_buffer_size=30, prediction_interval=15, threshold=0.7):
461
  self.model_path = model_path
462
  self.threshold = threshold
 
463
  self.max_buffer_size = frame_buffer_size
464
  self.prediction_interval = prediction_interval
465
 
@@ -488,6 +516,9 @@ class SignLanguageRecognizer:
488
  self.word_sequence = []
489
  self.last_added_word = None
490
  self.word_cooldown = 0
 
 
 
491
 
492
  # 生成的句子
493
  self.generated_sentence = ""
@@ -495,7 +526,7 @@ class SignLanguageRecognizer:
495
 
496
  # GPT整合
497
  try:
498
- self.openai_client = OpenAI()
499
  except Exception as e:
500
  print(f"初始化OpenAI客户端出錯: {e}")
501
  self.openai_client = None
@@ -503,29 +534,8 @@ class SignLanguageRecognizer:
503
  print(f"即時辨識器初始化完成!使用設備: {self.device}")
504
 
505
  def _load_label_mapping(self):
506
- """加載標籤映射"""
507
- label_map = {}
508
- # 嘗試從特徵目錄推斷類別標籤
509
- features_dir = os.path.join(DATA_DIR, 'features', 'keypoints')
510
- if os.path.exists(features_dir):
511
- unique_labels = set()
512
- for file_name in os.listdir(features_dir):
513
- if file_name.endswith('_keypoints.npy'):
514
- parts = file_name.split('_')
515
- if len(parts) >= 2:
516
- label = parts[0]
517
- if label not in unique_labels and not (label.startswith("aug") or "aug_" in label):
518
- unique_labels.add(label)
519
-
520
- if unique_labels:
521
- label_map = {i: label for i, label in enumerate(sorted(unique_labels))}
522
- print(f"從特徵目錄推斷了 {len(label_map)} 個類別標籤")
523
- else:
524
- label_map = {0: "eat", 1: "fish", 2: "like", 3: "want"}
525
- else:
526
- label_map = {0: "eat", 1: "fish", 2: "like", 3: "want"}
527
-
528
- return label_map
529
 
530
  def _load_model(self):
531
  """加載訓練好的模型"""
@@ -575,7 +585,7 @@ class SignLanguageRecognizer:
575
  # 定期進行預測
576
  if self.hand_present and self.frame_count % self.prediction_interval == 0 and len(self.keypoints_buffer) > 5:
577
  self._make_prediction()
578
- self._update_word_sequence()
579
 
580
  # 手部離開時生成句子
581
  if self.hand_present == False and self.hand_absent_frames == self.hand_absent_threshold and self.word_sequence:
@@ -683,52 +693,103 @@ class SignLanguageRecognizer:
683
 
684
  def _extract_features(self, frame):
685
  """從單一幀提取手部和姿勢特徵"""
686
- with self.feature_extractor.mp_holistic.Holistic(
687
- static_image_mode=False,
688
- model_complexity=1,
689
- smooth_landmarks=True,
690
- enable_segmentation=False,
691
- min_detection_confidence=0.1,
692
- min_tracking_confidence=0.1
693
- ) as holistic:
694
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
695
- results = holistic.process(frame_rgb)
696
-
697
- hands_detected = (results.left_hand_landmarks is not None or
698
- results.right_hand_landmarks is not None)
699
-
700
- try:
701
- keypoints = self.feature_extractor.extract_pose_keypoints(frame, results)
702
- return keypoints, hands_detected
703
- except Exception as e:
704
- return None, hands_detected
705
 
706
  def _make_prediction(self):
707
- """使用緩衝區中的特徵進行預測"""
708
  if len(self.keypoints_buffer) < 2:
709
  return
710
-
711
- # 優化tensor創建避免效能警告
712
  keypoints_array = np.array(list(self.keypoints_buffer), dtype=np.float32)
713
  keypoints_tensor = torch.from_numpy(keypoints_array).unsqueeze(0).to(self.device)
714
 
715
  with torch.no_grad():
716
  outputs = self.model(keypoints_tensor)
717
  probabilities = torch.nn.functional.softmax(outputs, dim=1)
718
-
719
  max_prob, predicted_class = torch.max(probabilities, 1)
720
  predicted_class = predicted_class.item()
721
  max_prob = max_prob.item()
722
-
723
  probs = probabilities[0].cpu().numpy()
724
 
725
- if max_prob >= self.threshold:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
726
  self.current_prediction = predicted_class
727
  self.prediction_probabilities = probs
728
  else:
729
  self.current_prediction = -1
730
  self.prediction_probabilities = probs
731
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
732
  def initialize_recognizer():
733
  global recognizer
734
 
@@ -762,7 +823,7 @@ def gen_frames():
762
 
763
  socketio.emit('update_frame', {'image': frame_data, 'status': status})
764
 
765
- time.sleep(0.03) # 約30 FPS
766
 
767
  #--------------------
768
  # 路由定義
@@ -805,6 +866,13 @@ def verify_webhook():
805
  def handle_webhook():
806
  """處理從 Messenger 來的訊息"""
807
  try:
 
 
 
 
 
 
 
808
  data = request.get_json()
809
 
810
  if data.get('object') == 'page':
@@ -821,6 +889,18 @@ def handle_webhook():
821
  print(f"處理 webhook 時發生錯誤: {e}")
822
  return "錯誤", 500
823
 
 
 
 
 
 
 
 
 
 
 
 
 
824
  @app.route('/receive_recognition_result', methods=['POST'])
825
  def receive_recognition_result():
826
  """接收手語辨識結果(內部呼叫)"""
@@ -866,6 +946,13 @@ def process_video():
866
 
867
  if video_file.filename == '':
868
  return jsonify({"status": "error", "message": "沒有選擇檔案"}), 400
 
 
 
 
 
 
 
869
 
870
  # 使用臨時檔案避免權限問題
871
  import tempfile
@@ -1067,6 +1154,12 @@ def process_messenger_video(video_url, sender_id):
1067
  send_message(sender_id, generated_sentence)
1068
  else:
1069
  send_message(sender_id, "抱歉,無法辨識您的手語內容,請再試一次。")
 
 
 
 
 
 
1070
 
1071
  except Exception as e:
1072
  print(f"處理 Messenger 影片時發生錯誤:{e}")
@@ -1134,11 +1227,11 @@ def handle_stop_stream(data):
1134
  #--------------------
1135
  if __name__ == '__main__':
1136
  # HuggingFace Spaces 環境檢測
1137
- port = int(os.environ.get('PORT', 7860)) # HuggingFace 預設端口
1138
 
1139
  print("🚀 手語辨識整合系統啟動中...")
1140
  print(f"📱 Messenger Bot: {'已配置' if PAGE_ACCESS_TOKEN != 'your_page_access_token' else '未配置'}")
1141
- print(f"🤖 OpenAI API: {'已配置' if os.environ.get('OPENAI_API_KEY') else '未配置'}")
1142
- print(f"🔧 運行模式: {'HuggingFace Spaces' if port == 7860 else '本地開發'}")
1143
 
1144
- socketio.run(app, host='0.0.0.0', port=port, debug=False, allow_unsafe_werkzeug=True)
 
2
  # -*- coding: utf-8 -*-
3
 
4
  import os
5
+ import hmac
6
+ import hashlib
7
  import json
8
  import requests
9
  import cv2
 
22
  from datetime import datetime
23
  from flask_socketio import SocketIO, emit
24
  from openai import OpenAI
25
+ from app_config import get_config
26
+
27
+ # 選擇 SocketIO 執行模式(優先使用 eventlet)
28
+ ASYNC_MODE = 'threading'
29
+ try:
30
+ import eventlet
31
+ eventlet.monkey_patch()
32
+ ASYNC_MODE = 'eventlet'
33
+ except Exception:
34
+ ASYNC_MODE = 'threading'
35
 
36
  # 環境變數設定
37
  # OpenAI API KEY 應該從環境變數獲取,不要硬編碼
 
46
  IS_HUGGINGFACE = os.environ.get('SPACE_ID') is not None
47
  IS_LOCAL_DEV = not IS_HUGGINGFACE
48
 
49
+ # 載入集中設定
50
+ CONFIG = get_config()
51
+
52
  # Flask 應用初始化
53
  app = Flask(__name__)
54
  app.config['SECRET_KEY'] = 'sign_language_secret_key'
55
+ app.config['MAX_CONTENT_LENGTH'] = CONFIG.get('MAX_FILE_SIZE', 100 * 1024 * 1024) # 100MB max file size
56
+ socketio = SocketIO(app, cors_allowed_origins="*", async_mode=ASYNC_MODE)
57
 
58
  # Messenger Bot 設定
59
+ VERIFY_TOKEN = CONFIG.get('VERIFY_TOKEN', 'your_verify_token')
60
+ PAGE_ACCESS_TOKEN = CONFIG.get('PAGE_ACCESS_TOKEN', 'your_page_access_token')
61
+ APP_SECRET = CONFIG.get('APP_SECRET')
62
  FACEBOOK_API_URL = 'https://graph.facebook.com/v18.0/me/messages'
63
 
64
  # 路徑設定 - 適應不同環境
 
93
  self.mp_holistic = mp.solutions.holistic
94
  self.mp_drawing = mp.solutions.drawing_utils
95
  self.mp_drawing_styles = mp.solutions.drawing_styles
96
+ # 建立長駐的 Holistic 實例(避免每幀重建導致效能低落)
97
+ self.holistic = self.mp_holistic.Holistic(
98
+ static_image_mode=False,
99
+ model_complexity=1,
100
+ smooth_landmarks=True,
101
+ enable_segmentation=False,
102
+ min_detection_confidence=0.5,
103
+ min_tracking_confidence=0.5
104
+ )
105
+
106
+ def close(self):
107
+ try:
108
+ if self.holistic:
109
+ self.holistic.close()
110
+ except Exception:
111
+ pass
112
 
113
  def extract_pose_keypoints(self, frame, holistic_results):
114
  """提取骨架關鍵點"""
 
258
  def __init__(self, model_path, threshold=0.7):
259
  self.model_path = model_path
260
  self.threshold = threshold
261
+ self.effective_threshold = threshold
262
 
263
  # 初始化特徵提取器
264
  self.feature_extractor = FeatureExtractor()
 
272
 
273
  # GPT整合
274
  try:
275
+ self.openai_client = OpenAI(timeout=10.0, max_retries=2)
276
  except Exception as e:
277
  print(f"初始化OpenAI客户端出錯: {e}")
278
  self.openai_client = None
 
280
  print(f"影片辨識器初始化完成!使用設備: {self.device}")
281
 
282
  def _load_label_mapping(self):
283
+ """加載標籤映射(統一由 labels.csv 提供)"""
284
+ return load_label_mapping_from_csv()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
 
286
  def _load_model(self):
287
  """加載訓練好的模型"""
 
331
  # 提取特徵序列
332
  keypoints_sequence = []
333
  frame_count = 0
334
+ hands_present_count = 0
335
+ motion_history = []
336
+ prev_gray = None
337
 
338
  while True:
339
  ret, frame = cap.read()
 
342
 
343
  # 跳幀處理
344
  if frame_count % 5 == 0: # 每5幀處理一次
345
+ keypoints, hands_detected = self._extract_features(frame)
346
  if keypoints is not None:
347
  keypoints_sequence.append(keypoints)
348
+ if hands_detected:
349
+ hands_present_count += 1
350
+
351
+ # 計算光流運動量
352
+ try:
353
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
354
+ if prev_gray is not None:
355
+ flow = cv2.calcOpticalFlowFarneback(prev_gray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
356
+ mag, ang = cv2.cartToPolar(flow[...,0], flow[...,1])
357
+ motion_history.append(float(np.mean(mag)))
358
+ prev_gray = gray
359
+ except Exception:
360
+ pass
361
 
362
  frame_count += 1
363
 
 
371
  print(f"❌ 有效幀數不足,無法進行辨識")
372
  return None, 0
373
 
374
+ # 動態調整 threshold(手部存在比例 + 運動量)
375
+ frames_used = max(1, len(keypoints_sequence))
376
+ hand_ratio = hands_present_count / frames_used
377
+ avg_motion = float(np.mean(motion_history)) if motion_history else 0.0
378
+ dynamic_threshold = self.threshold
379
+ if hand_ratio < 0.3:
380
+ dynamic_threshold = min(0.9, dynamic_threshold + 0.1)
381
+ if avg_motion < 0.05:
382
+ dynamic_threshold = min(0.9, dynamic_threshold + 0.05)
383
+ self.effective_threshold = dynamic_threshold
384
+
385
+ # 進行預測(使用動態 threshold)
386
  prediction, confidence, word_sequence, probabilities = self._predict_from_sequence(keypoints_sequence)
387
 
388
  # 使用GPT生成完整句子
 
396
  'word_sequence': word_sequence,
397
  'confidence': confidence,
398
  'probabilities': probabilities,
399
+ 'generated_sentence': generated_sentence,
400
+ 'hand_presence_ratio': hand_ratio,
401
+ 'avg_motion': avg_motion,
402
+ 'effective_threshold': dynamic_threshold
403
  }
404
 
405
  def _extract_features(self, frame):
406
  """從單一幀提取手部和姿勢特徵"""
407
+ # 轉為RGB
408
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
409
+
410
+ # 使用長駐的 holistic 實例處理圖像
411
+ results = self.feature_extractor.holistic.process(frame_rgb)
412
+
413
+ # 檢查是否有手部被檢測到
414
+ hands_detected = (results.left_hand_landmarks is not None or
415
+ results.right_hand_landmarks is not None)
416
+
417
+ try:
418
+ keypoints = self.feature_extractor.extract_pose_keypoints(frame, results)
419
+ return keypoints, hands_detected
420
+ except Exception as e:
421
+ return None, hands_detected
 
 
 
 
 
 
 
 
422
 
423
  def _predict_from_sequence(self, keypoints_sequence):
424
  """從關鍵點序列進行預測"""
 
437
  # 提取所有類別的機率
438
  probs = probabilities[0].cpu().numpy()
439
 
440
+ effective_thr = getattr(self, 'effective_threshold', self.threshold)
441
+ if confidence >= effective_thr:
442
  predicted_word = self.label_map.get(predicted_class, f"類別{predicted_class}")
443
  word_sequence = [predicted_word]
444
  else:
 
487
  def __init__(self, model_path, frame_buffer_size=30, prediction_interval=15, threshold=0.7):
488
  self.model_path = model_path
489
  self.threshold = threshold
490
+ self.dynamic_threshold = threshold
491
  self.max_buffer_size = frame_buffer_size
492
  self.prediction_interval = prediction_interval
493
 
 
516
  self.word_sequence = []
517
  self.last_added_word = None
518
  self.word_cooldown = 0
519
+ self.recent_top1_queue = collections.deque(maxlen=15)
520
+ self.ema_confidence = 0.0
521
+ self.ema_alpha = 0.3
522
 
523
  # 生成的句子
524
  self.generated_sentence = ""
 
526
 
527
  # GPT整合
528
  try:
529
+ self.openai_client = OpenAI(timeout=10.0, max_retries=2)
530
  except Exception as e:
531
  print(f"初始化OpenAI客户端出錯: {e}")
532
  self.openai_client = None
 
534
  print(f"即時辨識器初始化完成!使用設備: {self.device}")
535
 
536
  def _load_label_mapping(self):
537
+ """加載標籤映射(統一由 labels.csv 提供)"""
538
+ return load_label_mapping_from_csv()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
539
 
540
  def _load_model(self):
541
  """加載訓練好的模型"""
 
585
  # 定期進行預測
586
  if self.hand_present and self.frame_count % self.prediction_interval == 0 and len(self.keypoints_buffer) > 5:
587
  self._make_prediction()
588
+ self._apply_smoothing_and_decide()
589
 
590
  # 手部離開時生成句子
591
  if self.hand_present == False and self.hand_absent_frames == self.hand_absent_threshold and self.word_sequence:
 
693
 
694
  def _extract_features(self, frame):
695
  """從單一幀提取手部和姿勢特徵"""
696
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
697
+ results = self.feature_extractor.holistic.process(frame_rgb)
698
+
699
+ hands_detected = (results.left_hand_landmarks is not None or
700
+ results.right_hand_landmarks is not None)
701
+
702
+ try:
703
+ keypoints = self.feature_extractor.extract_pose_keypoints(frame, results)
704
+ return keypoints, hands_detected
705
+ except Exception as e:
706
+ return None, hands_detected
 
 
 
 
 
 
 
 
707
 
708
  def _make_prediction(self):
709
+ """使用緩衝區中的特徵進行預測,並更新平滑緩衝"""
710
  if len(self.keypoints_buffer) < 2:
711
  return
712
+
 
713
  keypoints_array = np.array(list(self.keypoints_buffer), dtype=np.float32)
714
  keypoints_tensor = torch.from_numpy(keypoints_array).unsqueeze(0).to(self.device)
715
 
716
  with torch.no_grad():
717
  outputs = self.model(keypoints_tensor)
718
  probabilities = torch.nn.functional.softmax(outputs, dim=1)
 
719
  max_prob, predicted_class = torch.max(probabilities, 1)
720
  predicted_class = predicted_class.item()
721
  max_prob = max_prob.item()
 
722
  probs = probabilities[0].cpu().numpy()
723
 
724
+ # 更新 EMA 信心
725
+ self.ema_confidence = self.ema_alpha * max_prob + (1 - self.ema_alpha) * self.ema_confidence
726
+
727
+ # 記錄近N次 top1,供投票平滑
728
+ self.recent_top1_queue.append(predicted_class)
729
+
730
+ # 動態 threshold:手不存在或 EMA 偏低時提高門檻
731
+ dyn_thr = self.threshold
732
+ if not self.hand_present:
733
+ dyn_thr = min(0.95, dyn_thr + 0.15)
734
+ if self.ema_confidence < 0.5:
735
+ dyn_thr = min(0.9, dyn_thr + 0.1)
736
+ self.dynamic_threshold = dyn_thr
737
+
738
+ if max_prob >= dyn_thr:
739
  self.current_prediction = predicted_class
740
  self.prediction_probabilities = probs
741
  else:
742
  self.current_prediction = -1
743
  self.prediction_probabilities = probs
744
 
745
+ def _apply_smoothing_and_decide(self):
746
+ """多幀投票 + 冷卻控制,縮減抖動後再加入字串"""
747
+ if self.current_prediction is None:
748
+ return
749
+
750
+ # 多幀投票:取近N幀最多數的類別
751
+ if len(self.recent_top1_queue) >= max(5, self.recent_top1_queue.maxlen // 2):
752
+ counts = collections.Counter(self.recent_top1_queue)
753
+ voted_class, voted_count = counts.most_common(1)[0]
754
+ vote_ratio = voted_count / len(self.recent_top1_queue)
755
+ else:
756
+ voted_class, vote_ratio = self.current_prediction, 0.0
757
+
758
+ # 分級門檻:投票占比與 EMA 信心共同決策
759
+ strong = (vote_ratio >= 0.6 and self.ema_confidence >= 0.6)
760
+ medium = (vote_ratio >= 0.5 and self.ema_confidence >= 0.5)
761
+ weak = (vote_ratio >= 0.4 and self.ema_confidence >= 0.45)
762
+
763
+ decided_class = -1
764
+ if strong or medium or weak:
765
+ decided_class = voted_class
766
+
767
+ # 產生單詞
768
+ if decided_class >= 0:
769
+ self.current_prediction = decided_class
770
+ self._update_word_sequence()
771
+
772
+ def load_label_mapping_from_csv(labels_file: str = LABELS_PATH):
773
+ """從 labels.csv 統一載入標籤映射;失敗則回退到預設。"""
774
+ label_map = {}
775
+ print(f"🔍 嘗試載入標籤檔案: {labels_file}")
776
+ if os.path.exists(labels_file):
777
+ try:
778
+ df = pd.read_csv(labels_file)
779
+ for _, row in df.iterrows():
780
+ label_map[int(row['index'])] = row['label']
781
+ print(f"✅ 從 {labels_file} 載入了 {len(label_map)} 個類別標籤")
782
+ print(f"📊 標籤映射: {label_map}")
783
+ except Exception as e:
784
+ print(f"❌ 讀取 labels.csv 出錯: {e}")
785
+ else:
786
+ print(f"❌ 標籤檔案不存在: {labels_file}")
787
+
788
+ if not label_map:
789
+ label_map = {0: "eat", 1: "fish", 2: "like", 3: "want"}
790
+ print(f"⚠️ 使用預設標籤映射: {label_map}")
791
+ return label_map
792
+
793
  def initialize_recognizer():
794
  global recognizer
795
 
 
823
 
824
  socketio.emit('update_frame', {'image': frame_data, 'status': status})
825
 
826
+ time.sleep(0.1) # 約10 FPS,降低頻寬與CPU
827
 
828
  #--------------------
829
  # 路由定義
 
866
  def handle_webhook():
867
  """處理從 Messenger 來的訊息"""
868
  try:
869
+ # 驗證 Facebook 簽章
870
+ if APP_SECRET:
871
+ signature = request.headers.get('X-Hub-Signature-256')
872
+ if not _verify_facebook_signature(signature, request.data, APP_SECRET):
873
+ print("簽章驗證失敗")
874
+ return "簽章驗證失敗", 403
875
+
876
  data = request.get_json()
877
 
878
  if data.get('object') == 'page':
 
889
  print(f"處理 webhook 時發生錯誤: {e}")
890
  return "錯誤", 500
891
 
892
+ def _verify_facebook_signature(signature_header: str, payload: bytes, app_secret: str) -> bool:
893
+ """驗證 X-Hub-Signature-256 簽章(Facebook Webhook 安全)"""
894
+ try:
895
+ if not signature_header or not signature_header.startswith('sha256='):
896
+ return False
897
+ received_sig = signature_header.split('=')[1]
898
+ mac = hmac.new(app_secret.encode('utf-8'), msg=payload, digestmod=hashlib.sha256)
899
+ expected_sig = mac.hexdigest()
900
+ return hmac.compare_digest(received_sig, expected_sig)
901
+ except Exception:
902
+ return False
903
+
904
  @app.route('/receive_recognition_result', methods=['POST'])
905
  def receive_recognition_result():
906
  """接收手語辨識結果(內部呼叫)"""
 
946
 
947
  if video_file.filename == '':
948
  return jsonify({"status": "error", "message": "沒有選擇檔案"}), 400
949
+
950
+ # 基本 MIME 與副檔名檢查
951
+ allowed_exts = {'.mp4', '.mov', '.avi', '.wmv', '.mkv'}
952
+ _, ext = os.path.splitext(video_file.filename.lower())
953
+ content_type = (video_file.content_type or '').lower()
954
+ if ext not in allowed_exts and not content_type.startswith('video/'):
955
+ return jsonify({"status": "error", "message": "不支援的影片格式"}), 400
956
 
957
  # 使用臨時檔案避免權限問題
958
  import tempfile
 
1154
  send_message(sender_id, generated_sentence)
1155
  else:
1156
  send_message(sender_id, "抱歉,無法辨識您的手語內容,請再試一次。")
1157
+
1158
+ # 釋放 Mediapipe 資源
1159
+ try:
1160
+ video_recognizer.feature_extractor.close()
1161
+ except Exception:
1162
+ pass
1163
 
1164
  except Exception as e:
1165
  print(f"處理 Messenger 影片時發生錯誤:{e}")
 
1227
  #--------------------
1228
  if __name__ == '__main__':
1229
  # HuggingFace Spaces 環境檢測
1230
+ port = int(CONFIG.get('PORT', 7860)) # HuggingFace 預設端口
1231
 
1232
  print("🚀 手語辨識整合系統啟動中...")
1233
  print(f"📱 Messenger Bot: {'已配置' if PAGE_ACCESS_TOKEN != 'your_page_access_token' else '未配置'}")
1234
+ print(f"🤖 OpenAI API: {'已配置' if CONFIG.get('OPENAI_API_KEY') else '未配置'}")
1235
+ print(f"🔧 運行模式: {'HuggingFace Spaces' if port == 7860 else '本地開發'} | SocketIO: {ASYNC_MODE}")
1236
 
1237
+ socketio.run(app, host='0.0.0.0', port=port, debug=CONFIG.get('DEBUG', False))
app_config.py CHANGED
@@ -23,6 +23,7 @@ DEFAULT_CONFIG = {
23
  def get_config():
24
  return {
25
  "OPENAI_API_KEY": os.environ.get("OPENAI_API_KEY"),
 
26
  "VERIFY_TOKEN": os.environ.get("VERIFY_TOKEN", "your_verify_token"),
27
  "PAGE_ACCESS_TOKEN": os.environ.get("PAGE_ACCESS_TOKEN", "your_page_access_token"),
28
  "PORT": int(os.environ.get("PORT", 7860)),
 
23
  def get_config():
24
  return {
25
  "OPENAI_API_KEY": os.environ.get("OPENAI_API_KEY"),
26
+ "APP_SECRET": os.environ.get("APP_SECRET"),
27
  "VERIFY_TOKEN": os.environ.get("VERIFY_TOKEN", "your_verify_token"),
28
  "PAGE_ACCESS_TOKEN": os.environ.get("PAGE_ACCESS_TOKEN", "your_page_access_token"),
29
  "PORT": int(os.environ.get("PORT", 7860)),
requirements.txt CHANGED
@@ -1,5 +1,6 @@
1
  flask>=2.3.0,<3.1.0
2
  flask-socketio>=5.3.0,<6.0.0
 
3
  opencv-python-headless>=4.8.0,<5.0.0
4
  numpy>=1.21.0,<2.0.0
5
  pandas>=1.5.0,<3.0.0
 
1
  flask>=2.3.0,<3.1.0
2
  flask-socketio>=5.3.0,<6.0.0
3
+ eventlet>=0.33.0,<0.34.0
4
  opencv-python-headless>=4.8.0,<5.0.0
5
  numpy>=1.21.0,<2.0.0
6
  pandas>=1.5.0,<3.0.0