AI-Interview-system / modules /face_landmarks.py
Sunaina792's picture
Upload 29 files
aa8e154 verified
# MODULE 1: Face Detection & Facial Landmark Extraction
# AI Interview Confidence & Behavior Analysis System
import cv2
import numpy as np
import os
import sys
# ── Safe MediaPipe import (handles all versions + missing DLLs) ──
MP_AVAILABLE = False
try:
import mediapipe as mp
from mediapipe.tasks import python as mp_python
from mediapipe.tasks.python import vision as mp_vision
NEW_API = True
MP_AVAILABLE = True
print("[INFO] Using NEW MediaPipe API (>= 0.10.x)")
except Exception as exc:
mp_python = None
mp_vision = None
try:
# fallback older API if task module not present
import mediapipe as mp
MP_AVAILABLE = True
NEW_API = False
print("[INFO] Using LEGACY MediaPipe API (0.9.x)")
except Exception as inner_exc:
print("[WARNING] MediaPipe is not available. Face landmark functionality will be disabled.")
print("[WARNING] Import error:", repr(exc))
print("[WARNING] If you are on Windows, install Microsoft C++ Redistributable for Visual Studio 2015-2019 (msvcp140.dll, msvcp140_1.dll).")
MP_AVAILABLE = False
mp = None
NEW_API = False
# CONFIGURATION
FRAME_WIDTH = 640
FRAME_HEIGHT = 480
PROCESS_EVERY_N_FRAMES = 3
EAR_BLINK_THRESHOLD = 0.20
LANDMARK_INDICES = {
"left_eye": [33, 160, 158, 133, 153, 144],
"right_eye": [362, 385, 387, 263, 373, 380],
"left_iris": [468],
"right_iris": [473],
"nose_tip": [1],
"mouth": [13, 14, 78, 308],
"left_eyebrow": [70, 63, 105, 66, 107],
"right_eyebrow": [336, 296, 334, 293, 300],
"chin": [152],
"forehead": [10],
}
REGION_COLORS = {
"left_eye": (0, 255, 0),
"right_eye": (0, 255, 0),
"left_iris": (255, 100, 0),
"right_iris": (255, 100, 0),
"nose_tip": (0, 165, 255),
"mouth": (0, 0, 255),
"left_eyebrow": (255, 255, 0),
"right_eyebrow": (255, 255, 0),
"chin": (255, 0, 255),
"forehead": (255, 255, 255),
}
# EXTRACTOR CLASS
class FaceLandmarkExtractor:
def __init__(self):
self.frame_count = 0
self.last_result = None
self.enabled = MP_AVAILABLE
if not self.enabled:
print("[ERROR] FaceLandmarkExtractor disabled: MediaPipe not available.")
return
if NEW_API:
self._init_new_api()
else:
self._init_legacy_api()
def _init_new_api(self):
base_options = mp_python.BaseOptions(model_asset_path=self._get_model_path())
options = mp_vision.FaceLandmarkerOptions(
base_options=base_options,
output_face_blendshapes=False,
output_facial_transformation_matrixes=False,
num_faces=1,
min_face_detection_confidence=0.5,
min_face_presence_confidence=0.5,
min_tracking_confidence=0.5,
)
self.detector = mp_vision.FaceLandmarker.create_from_options(options)
def _get_model_path(self):
import urllib.request
model_path = "face_landmarker.task"
if not os.path.exists(model_path):
print("[INFO] Downloading face landmarker model (~6 MB)...")
url = "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task"
urllib.request.urlretrieve(url, model_path)
print("[INFO] Model downloaded!")
return model_path
def _init_legacy_api(self):
self.face_mesh = mp.solutions.face_mesh.FaceMesh(
static_image_mode=False,
max_num_faces=1,
refine_landmarks=True,
min_detection_confidence=0.5,
min_tracking_confidence=0.5,
)
self.mp_drawing = mp.solutions.drawing_utils
self.mp_drawing_styles = mp.solutions.drawing_styles
self.mp_face_mesh = mp.solutions.face_mesh
def extract(self, frame):
"""For video/webcam — includes frame skipping for performance."""
if not self.enabled:
return self._empty_result(frame)
self.frame_count += 1
if self.frame_count % PROCESS_EVERY_N_FRAMES != 0:
return self.last_result if self.last_result else self._empty_result(frame)
frame = cv2.resize(frame, (FRAME_WIDTH, FRAME_HEIGHT))
result = self._run_detection(frame)
self.last_result = result
return result
def extract_image(self, frame):
"""For static images — no frame skipping, processes everything."""
if not self.enabled:
return self._empty_result(frame)
return self._run_detection(frame)
def _run_detection(self, frame):
if not self.enabled:
return self._empty_result(frame)
return self._extract_new_api(frame) if NEW_API else self._extract_legacy_api(frame)
def _extract_legacy_api(self, frame):
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results = self.face_mesh.process(rgb)
annotated = frame.copy()
if not results.multi_face_landmarks:
return self._empty_result(frame)
face_lms = results.multi_face_landmarks[0]
self.mp_drawing.draw_landmarks(
image=annotated, landmark_list=face_lms,
connections=self.mp_face_mesh.FACEMESH_TESSELATION,
landmark_drawing_spec=None,
connection_drawing_spec=self.mp_drawing_styles.get_default_face_mesh_tesselation_style(),
)
self.mp_drawing.draw_landmarks(
image=annotated, landmark_list=face_lms,
connections=self.mp_face_mesh.FACEMESH_EYES,
landmark_drawing_spec=None,
connection_drawing_spec=self.mp_drawing_styles.get_default_face_mesh_contours_style(),
)
key_points = self._to_pixels(face_lms.landmark, frame)
return {"face_detected": True, "landmarks": face_lms, "key_points": key_points,
"annotated_frame": annotated, "ear": self._compute_ear(key_points)}
def _extract_new_api(self, frame):
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb)
detection = self.detector.detect(mp_image)
annotated = frame.copy()
if not detection.face_landmarks:
return self._empty_result(frame)
landmarks = detection.face_landmarks[0]
key_points = self._to_pixels(landmarks, frame)
return {"face_detected": True, "landmarks": landmarks, "key_points": key_points,
"annotated_frame": annotated, "ear": self._compute_ear(key_points)}
def _to_pixels(self, landmark_list, frame):
h, w = frame.shape[:2]
key_points = {}
for region, indices in LANDMARK_INDICES.items():
pts = []
for idx in indices:
if idx < len(landmark_list):
lm = landmark_list[idx]
pts.append((int(lm.x * w), int(lm.y * h)))
key_points[region] = pts
return key_points
def _compute_ear(self, key_points):
def ear(pts):
if len(pts) < 6: return 0.0
A = np.linalg.norm(np.array(pts[1]) - np.array(pts[5]))
B = np.linalg.norm(np.array(pts[2]) - np.array(pts[4]))
C = np.linalg.norm(np.array(pts[0]) - np.array(pts[3]))
return round((A + B) / (2.0 * C), 4) if C != 0 else 0.0
l = ear(key_points.get("left_eye", []))
r = ear(key_points.get("right_eye", []))
return {"left": l, "right": r, "avg": round((l + r) / 2, 4) if (l and r) else 0.0}
def _empty_result(self, frame):
return {"face_detected": False, "landmarks": None, "key_points": {},
"annotated_frame": frame, "ear": {"left": 0.0, "right": 0.0, "avg": 0.0}}
def release(self):
if not NEW_API and hasattr(self, "face_mesh"):
self.face_mesh.close()
# SHARED DRAWING HELPERS
def draw_key_points(frame, key_points):
for region, pts in key_points.items():
color = REGION_COLORS.get(region, (200, 200, 200))
for pt in pts:
cv2.circle(frame, pt, 5, color, -1)
cv2.circle(frame, pt, 6, (0, 0, 0), 1)
return frame
def draw_legend(frame):
items = [("Eyes",(0,255,0)),("Iris",(255,100,0)),("Nose",(0,165,255)),
("Mouth",(0,0,255)),("Eyebrows",(255,255,0)),("Chin/Head",(255,0,255))]
lx = 10
ly = frame.shape[0] - (len(items) * 20 + 25)
cv2.putText(frame, "Legend:", (lx, ly), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1)
for i, (label, color) in enumerate(items):
y = ly + 18 + i * 20
cv2.circle(frame, (lx+6, y-5), 5, color, -1)
cv2.putText(frame, label, (lx+18, y), cv2.FONT_HERSHEY_SIMPLEX, 0.45, (220,220,220), 1)
return frame
def draw_overlay(frame, ear, face_detected):
color = (0,255,0) if face_detected else (0,0,255)
text = "FACE DETECTED" if face_detected else "NO FACE FOUND"
cv2.putText(frame, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
if face_detected:
blink = "BLINK" if ear["avg"] < EAR_BLINK_THRESHOLD else "Eyes Open"
cv2.putText(frame, f"EAR: {ear['avg']} [{blink}]", (10, 65),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255,255,0), 2)
cv2.putText(frame, "478 landmarks detected", (10, 100),
cv2.FONT_HERSHEY_SIMPLEX, 0.55, (180,180,180), 1)
return frame
# MODE 1: IMAGE TEST
def run_image_test(image_path: str, save_output: bool = True):
"""
Test on a static image file. Saves annotated result next to original.
Usage:
python face_landmarks.py --image "D:/photos/myface.jpg"
Analogy: Like doing a fire drill with a fake alarm before
the real thing — safe, repeatable, zero risk.
"""
print("\n" + "="*55)
print(" IMAGE TEST MODE")
print("="*55)
print(f" File : {image_path}")
frame = cv2.imread(image_path)
if frame is None:
print(f"\n [ERROR] Cannot load image. Check the path.")
print(f" Example: D:/photos/face.jpg")
return
print(f" Size : {frame.shape[1]} x {frame.shape[0]} px\n")
extractor = FaceLandmarkExtractor()
result = extractor.extract_image(frame)
print("-"*55)
if not result["face_detected"]:
print(" [RESULT] NO FACE DETECTED")
print("\n Possible fixes:")
print(" Use a clear, front-facing, well-lit portrait photo")
print(" Avoid heavy shadows, masks, or extreme head tilt")
cv2.imshow("Result — No Face Detected (any key to close)", frame)
cv2.waitKey(0)
cv2.destroyAllWindows()
extractor.release()
return
print(" [RESULT] FACE DETECTED\n")
kp = result["key_points"]
ear = result["ear"]
# Key points table
print(f" {'Region':<18} {'Points':>6} Sample Coord")
print(" " + "-"*42)
for region, pts in kp.items():
sample = str(pts[0]) if pts else "N/A"
print(f" {region:<18} {len(pts):>6} {sample}")
# EAR report
print(f"\n EYE ASPECT RATIO (EAR):")
print(f" Left : {ear['left']:<8} {'BLINK' if ear['left'] < EAR_BLINK_THRESHOLD else 'Open'}")
print(f" Right : {ear['right']:<8} {'BLINK' if ear['right'] < EAR_BLINK_THRESHOLD else 'Open'}")
print(f" Avg : {ear['avg']:<8} {'BLINK' if ear['avg'] < EAR_BLINK_THRESHOLD else 'Open'}")
# Build annotated image
out = result["annotated_frame"].copy()
out = draw_key_points(out, kp)
out = draw_overlay(out, ear, True)
out = draw_legend(out)
# Save
if save_output:
base, ext = os.path.splitext(image_path)
out_path = f"{base}_landmarks{ext}"
cv2.imwrite(out_path, out)
print(f"\n Saved: {out_path}")
print("\n Press any key on the window to close.")
cv2.imshow("Module 1 - Image Test (any key to close)", out)
cv2.waitKey(0)
cv2.destroyAllWindows()
extractor.release()
print(" Done!\n")
# MODE 2: LIVE WEBCAM
def run_webcam_demo():
"""
Live webcam landmark detection. Press Q to quit.
Like a magic mirror that draws a precise dot-map
on your face in real time as you move.
"""
extractor = FaceLandmarkExtractor()
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print("[ERROR] Cannot open webcam. Ensure it is connected and not in use.")
return
print("[INFO] Webcam started. Press Q to quit.\n")
while True:
ret, frame = cap.read()
if not ret:
print("[ERROR] Failed to read from webcam.")
break
result = extractor.extract(frame)
disp = result["annotated_frame"].copy()
if result["face_detected"]:
disp = draw_key_points(disp, result["key_points"])
disp = draw_legend(disp)
disp = draw_overlay(disp, result["ear"], result["face_detected"])
cv2.imshow("Module 1 - Webcam (Q to quit)", disp)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
extractor.release()
cv2.destroyAllWindows()
print("[INFO] Webcam closed.")
# ENTRY POINT
if __name__ == "__main__":
# Command-line: python face_landmarks.py --image "D:\interview_analyzer\test.jpg"
# Command-line: python face_landmarks.py --webcam
if len(sys.argv) >= 3 and sys.argv[1] == "--image":
run_image_test(sys.argv[2])
sys.exit(0)
elif len(sys.argv) >= 2 and sys.argv[1] == "--webcam":
run_webcam_demo()
sys.exit(0)
# Interactive menu
print("\n" + "="*50)
print(" MODULE 1 - Face Detection & Landmarks")
print("="*50)
print(" [1] Test on an IMAGE file")
print(" [2] Live WEBCAM demo")
print("="*50)
choice = input(" Enter choice (1 or 2): ").strip()
if choice == "1":
path = input(" Enter image path (e.g. D:/photos/face.jpg): ").strip().strip('"')
run_image_test(path)
elif choice == "2":
run_webcam_demo()
else:
print(" Invalid. Run again and enter 1 or 2.")