Lior-0618 commited on
Commit
ffde148
·
1 Parent(s): f7f447d

fix: ImageNet normalization in FER preprocessing

Browse files
Files changed (1) hide show
  1. model/voxtral-server/main.py +4 -0
model/voxtral-server/main.py CHANGED
@@ -99,6 +99,10 @@ def _fer_frame(img_bgr: np.ndarray) -> Optional[str]:
99
 
100
  resized = cv2.resize(face_crop, (224, 224))
101
  rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
 
 
 
 
102
  tensor = np.transpose(rgb, (2, 0, 1))[np.newaxis] # [1, 3, 224, 224]
103
 
104
  out = _fer_session.run(None, {_fer_input_name: tensor})[0] # [1, 8]
 
99
 
100
  resized = cv2.resize(face_crop, (224, 224))
101
  rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
102
+ # ImageNet normalization (matches original emotion-recognition.ts)
103
+ mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
104
+ std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
105
+ rgb = (rgb - mean) / std
106
  tensor = np.transpose(rgb, (2, 0, 1))[np.newaxis] # [1, 3, 224, 224]
107
 
108
  out = _fer_session.run(None, {_fer_input_name: tensor})[0] # [1, 8]