AI-Interview-system / modules /head_pose.py
Sunaina792's picture
Upload 29 files
aa8e154 verified
# MODULE 4: Head Pose Estimation
import cv2
import numpy as np
from collections import deque
PITCH_THRESHOLD = 20 # raised — compensates for low camera angle
YAW_THRESHOLD = 28
PITCH_OFFSET = 8.0 # raised — reduces false Left/Right (was 20)
ROLL_THRESHOLD = 15
STABILITY_WINDOW = 90 # wider window = smoother stability score (was 60)
FACE_3D_POINTS = np.array([
[0.0, 0.0, 0.0 ],
[0.0, -330.0, -65.0 ],
[-225.0, 170.0, -135.0 ],
[225.0, 170.0, -135.0 ],
[-150.0,-150.0, -125.0 ],
[150.0, -150.0, -125.0 ],
], dtype=np.float64)
FACE_2D_INDICES = [1, 152, 33, 263, 78, 308]
class HeadPoseEstimator:
def __init__(self):
self.pitch_history = deque(maxlen=STABILITY_WINDOW)
self.yaw_history = deque(maxlen=STABILITY_WINDOW)
self.roll_history = deque(maxlen=STABILITY_WINDOW)
def detect(self, landmarks, frame_shape):
if landmarks is None:
return self._empty_result()
h, w = frame_shape[:2]
face_2d = self._get_2d_points(landmarks, w, h)
if face_2d is None:
return self._empty_result()
pitch, yaw, roll = self._solve_pnp(face_2d, w, h)
self.pitch_history.append(abs(pitch))
self.yaw_history.append(abs(yaw))
self.roll_history.append(abs(roll))
direction = self._get_direction(pitch, yaw, roll)
stability_score = self._compute_stability()
return {
"pitch": round(pitch, 2),
"yaw": round(yaw, 2),
"roll": round(roll, 2),
"direction": direction,
"stability_score": stability_score,
"is_stable": stability_score >= 60,
}
def _get_2d_points(self, landmarks, w, h):
try:
pts = []
lm_list = landmarks.landmark if hasattr(landmarks, "landmark") else landmarks
for idx in FACE_2D_INDICES:
lm = lm_list[idx]
pts.append([lm.x * w, lm.y * h])
return np.array(pts, dtype=np.float64)
except Exception:
return None
def _solve_pnp(self, face_2d, w, h):
focal_length = w
cam_matrix = np.array([
[focal_length, 0, w / 2],
[0, focal_length, h / 2],
[0, 0, 1 ]
], dtype=np.float64)
dist_coeffs = np.zeros((4, 1), dtype=np.float64)
success, rot_vec, _ = cv2.solvePnP(
FACE_3D_POINTS, face_2d, cam_matrix, dist_coeffs,
flags=cv2.SOLVEPNP_ITERATIVE
)
if not success:
return 0.0, 0.0, 0.0
rot_mat, _ = cv2.Rodrigues(rot_vec)
angles, _,_,_,_,_ = cv2.RQDecomp3x3(rot_mat)
pitch = angles[0] * 360
yaw = angles[1] * 360
roll = angles[2] * 360
# Mirror correction — webcam flips left/right
# Without this, turning your head right shows as "Left"
yaw = -yaw
return pitch, yaw, roll
def _get_direction(self, pitch, yaw, roll):
# Use the largest deviation as the primary direction
deviations = {
"yaw": abs(yaw),
"pitch": abs(pitch),
"roll": abs(roll),
}
dominant = max(deviations, key=lambda x: deviations[x])
if dominant == "yaw" and abs(yaw) > YAW_THRESHOLD:
return "Right" if yaw > 0 else "Left"
if dominant == "pitch" and abs(pitch) > PITCH_THRESHOLD:
return "Down" if pitch > 0 else "Up"
if dominant == "roll" and abs(roll) > ROLL_THRESHOLD:
return "Tilted"
return "Forward"
def _compute_stability(self):
if not self.pitch_history:
return 100
avg_pitch = np.mean(self.pitch_history)
avg_yaw = np.mean(self.yaw_history)
avg_roll = np.mean(self.roll_history)
score = 100 - (
min(avg_pitch / PITCH_THRESHOLD, 1.0) * 30 +
min(avg_yaw / YAW_THRESHOLD, 1.0) * 45 +
min(avg_roll / ROLL_THRESHOLD, 1.0) * 25
)
return max(0, int(score))
def _empty_result(self):
return {
"pitch": 0.0, "yaw": 0.0, "roll": 0.0,
"direction": "Unknown", "stability_score": 0, "is_stable": False,
}
def draw_head_pose_overlay(frame, result):
direction = result["direction"]
stability = result["stability_score"]
dir_color = (0, 255, 0) if direction == "Forward" else (0, 100, 255)
cv2.putText(frame, f"Head: {direction}", (10, 130),
cv2.FONT_HERSHEY_SIMPLEX, 0.75, dir_color, 2)
cv2.putText(frame,
f"Pitch:{result['pitch']:.1f} Yaw:{result['yaw']:.1f} Roll:{result['roll']:.1f}",
(10, 162), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (180, 180, 180), 1)
cv2.putText(frame, f"Stability: {stability}/100", (10, 192),
cv2.FONT_HERSHEY_SIMPLEX, 0.65,
(0, 255, 0) if stability >= 60 else (0, 165, 255), 1)
return frame
def test_on_image(image_path):
import sys, os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from face_landmarks import FaceLandmarkExtractor
frame = cv2.imread(image_path)
if frame is None:
print(f"[ERROR] Cannot load: {image_path}"); return
extractor = FaceLandmarkExtractor()
lm_result = extractor.extract_image(frame)
if not lm_result["face_detected"]:
print("[ERROR] No face detected."); return
estimator = HeadPoseEstimator()
result = estimator.detect(lm_result["landmarks"], frame.shape)
print("\n" + "="*45)
print(" MODULE 4 - IMAGE TEST RESULT")
print("="*45)
print(f" Direction : {result['direction']}")
print(f" Pitch : {result['pitch']} deg")
print(f" Yaw : {result['yaw']} deg")
print(f" Roll : {result['roll']} deg")
print(f" Stability : {result['stability_score']}/100")
out = lm_result["annotated_frame"].copy()
out = draw_head_pose_overlay(out, result)
cv2.imshow("Module 4 - Head Pose (any key to close)", out)
cv2.waitKey(0)
cv2.destroyAllWindows()
extractor.release()
def test_webcam():
import sys, os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from face_landmarks import FaceLandmarkExtractor
extractor = FaceLandmarkExtractor()
estimator = HeadPoseEstimator()
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print("[ERROR] Cannot open webcam."); return
print("[INFO] Webcam started. Press Q to quit.")
while True:
ret, frame = cap.read()
if not ret: break
lm_result = extractor.extract(frame)
disp = lm_result["annotated_frame"].copy()
if lm_result["face_detected"]:
result = estimator.detect(lm_result["landmarks"], frame.shape)
disp = draw_head_pose_overlay(disp, result)
cv2.imshow("Module 4 - Head Pose (Q to quit)", disp)
if cv2.waitKey(1) & 0xFF == ord("q"):
break
cap.release()
extractor.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
import sys
if len(sys.argv) >= 3 and sys.argv[1] == "--image":
test_on_image(sys.argv[2]); sys.exit(0)
elif len(sys.argv) >= 2 and sys.argv[1] == "--webcam":
test_webcam(); sys.exit(0)
print("\n" + "="*45)
print(" MODULE 4 - Head Pose Estimation")
print("="*45)
print(" [1] Test on IMAGE\n [2] Live WEBCAM")
print("="*45)
choice = input(" Choice (1 or 2): ").strip()
if choice == "1":
test_on_image(input(" Image path: ").strip().strip('"'))
elif choice == "2":
test_webcam()
else:
print(" Invalid choice.")