Spaces:
Sleeping
Sleeping
File size: 8,080 Bytes
aa8e154 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
# MODULE 4: Head Pose Estimation
import cv2
import numpy as np
from collections import deque
PITCH_THRESHOLD = 20 # raised — compensates for low camera angle
YAW_THRESHOLD = 28
PITCH_OFFSET = 8.0 # raised — reduces false Left/Right (was 20)
ROLL_THRESHOLD = 15
STABILITY_WINDOW = 90 # wider window = smoother stability score (was 60)
FACE_3D_POINTS = np.array([
[0.0, 0.0, 0.0 ],
[0.0, -330.0, -65.0 ],
[-225.0, 170.0, -135.0 ],
[225.0, 170.0, -135.0 ],
[-150.0,-150.0, -125.0 ],
[150.0, -150.0, -125.0 ],
], dtype=np.float64)
FACE_2D_INDICES = [1, 152, 33, 263, 78, 308]
class HeadPoseEstimator:
def __init__(self):
self.pitch_history = deque(maxlen=STABILITY_WINDOW)
self.yaw_history = deque(maxlen=STABILITY_WINDOW)
self.roll_history = deque(maxlen=STABILITY_WINDOW)
def detect(self, landmarks, frame_shape):
if landmarks is None:
return self._empty_result()
h, w = frame_shape[:2]
face_2d = self._get_2d_points(landmarks, w, h)
if face_2d is None:
return self._empty_result()
pitch, yaw, roll = self._solve_pnp(face_2d, w, h)
self.pitch_history.append(abs(pitch))
self.yaw_history.append(abs(yaw))
self.roll_history.append(abs(roll))
direction = self._get_direction(pitch, yaw, roll)
stability_score = self._compute_stability()
return {
"pitch": round(pitch, 2),
"yaw": round(yaw, 2),
"roll": round(roll, 2),
"direction": direction,
"stability_score": stability_score,
"is_stable": stability_score >= 60,
}
def _get_2d_points(self, landmarks, w, h):
try:
pts = []
lm_list = landmarks.landmark if hasattr(landmarks, "landmark") else landmarks
for idx in FACE_2D_INDICES:
lm = lm_list[idx]
pts.append([lm.x * w, lm.y * h])
return np.array(pts, dtype=np.float64)
except Exception:
return None
def _solve_pnp(self, face_2d, w, h):
focal_length = w
cam_matrix = np.array([
[focal_length, 0, w / 2],
[0, focal_length, h / 2],
[0, 0, 1 ]
], dtype=np.float64)
dist_coeffs = np.zeros((4, 1), dtype=np.float64)
success, rot_vec, _ = cv2.solvePnP(
FACE_3D_POINTS, face_2d, cam_matrix, dist_coeffs,
flags=cv2.SOLVEPNP_ITERATIVE
)
if not success:
return 0.0, 0.0, 0.0
rot_mat, _ = cv2.Rodrigues(rot_vec)
angles, _,_,_,_,_ = cv2.RQDecomp3x3(rot_mat)
pitch = angles[0] * 360
yaw = angles[1] * 360
roll = angles[2] * 360
# Mirror correction — webcam flips left/right
# Without this, turning your head right shows as "Left"
yaw = -yaw
return pitch, yaw, roll
def _get_direction(self, pitch, yaw, roll):
# Use the largest deviation as the primary direction
deviations = {
"yaw": abs(yaw),
"pitch": abs(pitch),
"roll": abs(roll),
}
dominant = max(deviations, key=lambda x: deviations[x])
if dominant == "yaw" and abs(yaw) > YAW_THRESHOLD:
return "Right" if yaw > 0 else "Left"
if dominant == "pitch" and abs(pitch) > PITCH_THRESHOLD:
return "Down" if pitch > 0 else "Up"
if dominant == "roll" and abs(roll) > ROLL_THRESHOLD:
return "Tilted"
return "Forward"
def _compute_stability(self):
if not self.pitch_history:
return 100
avg_pitch = np.mean(self.pitch_history)
avg_yaw = np.mean(self.yaw_history)
avg_roll = np.mean(self.roll_history)
score = 100 - (
min(avg_pitch / PITCH_THRESHOLD, 1.0) * 30 +
min(avg_yaw / YAW_THRESHOLD, 1.0) * 45 +
min(avg_roll / ROLL_THRESHOLD, 1.0) * 25
)
return max(0, int(score))
def _empty_result(self):
return {
"pitch": 0.0, "yaw": 0.0, "roll": 0.0,
"direction": "Unknown", "stability_score": 0, "is_stable": False,
}
def draw_head_pose_overlay(frame, result):
direction = result["direction"]
stability = result["stability_score"]
dir_color = (0, 255, 0) if direction == "Forward" else (0, 100, 255)
cv2.putText(frame, f"Head: {direction}", (10, 130),
cv2.FONT_HERSHEY_SIMPLEX, 0.75, dir_color, 2)
cv2.putText(frame,
f"Pitch:{result['pitch']:.1f} Yaw:{result['yaw']:.1f} Roll:{result['roll']:.1f}",
(10, 162), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (180, 180, 180), 1)
cv2.putText(frame, f"Stability: {stability}/100", (10, 192),
cv2.FONT_HERSHEY_SIMPLEX, 0.65,
(0, 255, 0) if stability >= 60 else (0, 165, 255), 1)
return frame
def test_on_image(image_path):
import sys, os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from face_landmarks import FaceLandmarkExtractor
frame = cv2.imread(image_path)
if frame is None:
print(f"[ERROR] Cannot load: {image_path}"); return
extractor = FaceLandmarkExtractor()
lm_result = extractor.extract_image(frame)
if not lm_result["face_detected"]:
print("[ERROR] No face detected."); return
estimator = HeadPoseEstimator()
result = estimator.detect(lm_result["landmarks"], frame.shape)
print("\n" + "="*45)
print(" MODULE 4 - IMAGE TEST RESULT")
print("="*45)
print(f" Direction : {result['direction']}")
print(f" Pitch : {result['pitch']} deg")
print(f" Yaw : {result['yaw']} deg")
print(f" Roll : {result['roll']} deg")
print(f" Stability : {result['stability_score']}/100")
out = lm_result["annotated_frame"].copy()
out = draw_head_pose_overlay(out, result)
cv2.imshow("Module 4 - Head Pose (any key to close)", out)
cv2.waitKey(0)
cv2.destroyAllWindows()
extractor.release()
def test_webcam():
import sys, os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from face_landmarks import FaceLandmarkExtractor
extractor = FaceLandmarkExtractor()
estimator = HeadPoseEstimator()
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print("[ERROR] Cannot open webcam."); return
print("[INFO] Webcam started. Press Q to quit.")
while True:
ret, frame = cap.read()
if not ret: break
lm_result = extractor.extract(frame)
disp = lm_result["annotated_frame"].copy()
if lm_result["face_detected"]:
result = estimator.detect(lm_result["landmarks"], frame.shape)
disp = draw_head_pose_overlay(disp, result)
cv2.imshow("Module 4 - Head Pose (Q to quit)", disp)
if cv2.waitKey(1) & 0xFF == ord("q"):
break
cap.release()
extractor.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
import sys
if len(sys.argv) >= 3 and sys.argv[1] == "--image":
test_on_image(sys.argv[2]); sys.exit(0)
elif len(sys.argv) >= 2 and sys.argv[1] == "--webcam":
test_webcam(); sys.exit(0)
print("\n" + "="*45)
print(" MODULE 4 - Head Pose Estimation")
print("="*45)
print(" [1] Test on IMAGE\n [2] Live WEBCAM")
print("="*45)
choice = input(" Choice (1 or 2): ").strip()
if choice == "1":
test_on_image(input(" Image path: ").strip().strip('"'))
elif choice == "2":
test_webcam()
else:
print(" Invalid choice.") |