Spaces:
Sleeping
Sleeping
| # MODULE 5: Fusion Scoring | |
| # AI Interview Confidence & Behavior Analysis System | |
| import cv2 | |
| import numpy as np | |
| from collections import deque | |
| # CONFIGURATION | |
| SCORE_HISTORY_WINDOW = 30 # frames for rolling average | |
| WEIGHTS = { | |
| "eye_contact": 0.30, | |
| "expression": 0.25, | |
| "head_pose": 0.25, | |
| "audio": 0.20, | |
| } | |
| COACHING_TIPS = { | |
| "eye_contact": { | |
| "low": "Maintain eye contact — look into the camera, not at the screen edges", | |
| "mid": "Good gaze direction — try to stay consistent", | |
| "high": "Excellent eye contact", | |
| }, | |
| "expression": { | |
| "low": "Relax your face — tension around the mouth and brows signals nervousness", | |
| "mid": "Expression is mostly calm — slight improvements possible", | |
| "high": "Facial expression looks confident and composed", | |
| }, | |
| "head_pose": { | |
| "low": "Keep your head steady and centered — avoid excessive nodding or tilting", | |
| "mid": "Head pose is acceptable — try to reduce side tilts", | |
| "high": "Head position looks stable and professional", | |
| }, | |
| "audio": { | |
| "low": "Speak clearly and steadily — avoid long pauses and filler words", | |
| "mid": "Voice is decent — work on projection and pace", | |
| "high": "Voice sounds confident and well-paced", | |
| }, | |
| } | |
| # FUSION SCORER | |
| class FusionScorer: | |
| def __init__(self): | |
| self.history = deque(maxlen=SCORE_HISTORY_WINDOW) | |
| self.session_scores = [] | |
| self.frame_count = 0 | |
| def compute( | |
| self, | |
| eye_contact_score: float = 0, | |
| expression_score: float = 0, | |
| head_pose_score: float = 0, | |
| audio_score: float = 0, | |
| ) -> dict: | |
| """ | |
| Fuses individual module scores into a single confidence score. | |
| Each input is expected 0–100. | |
| Returns dict with fused score, label, breakdown, and tips. | |
| """ | |
| self.frame_count += 1 | |
| scores = { | |
| "eye_contact": float(eye_contact_score), | |
| "expression": float(expression_score), | |
| "head_pose": float(head_pose_score), | |
| "audio": float(audio_score), | |
| } | |
| raw = sum(scores[k] * WEIGHTS[k] for k in scores) | |
| self.history.append(raw) | |
| self.session_scores.append(raw) | |
| smoothed = round(float(np.mean(self.history)), 1) | |
| label = self._label(smoothed) | |
| tips = self._tips(scores) | |
| dominant = self._weakest_signal(scores) | |
| return { | |
| "score": smoothed, | |
| "raw_score": round(raw, 1), | |
| "label": label, | |
| "breakdown": {k: round(scores[k], 1) for k in scores}, | |
| "weights": WEIGHTS, | |
| "tips": tips, | |
| "weakest_signal": dominant, | |
| "frame": self.frame_count, | |
| } | |
| def _label(self, score: float) -> str: | |
| if score >= 75: | |
| return "Confident" | |
| elif score >= 50: | |
| return "Moderate" | |
| else: | |
| return "Needs Improvement" | |
| def _tips(self, scores: dict) -> list: | |
| tips = [] | |
| for signal, val in scores.items(): | |
| bucket = self._bucket(val) | |
| tips.append(COACHING_TIPS[signal][bucket]) | |
| return tips | |
| def _bucket(self, val: float) -> str: | |
| if val >= 70: | |
| return "high" | |
| elif val >= 40: | |
| return "mid" | |
| else: | |
| return "low" | |
| def _weakest_signal(self, scores: dict) -> str: | |
| return min(scores, key=lambda k: scores[k]) | |
| def session_summary(self) -> dict: | |
| """Call at end of session to get overall stats.""" | |
| if not self.session_scores: | |
| return {} | |
| arr = np.array(self.session_scores) | |
| return { | |
| "avg_score": round(float(np.mean(arr)), 1), | |
| "max_score": round(float(np.max(arr)), 1), | |
| "min_score": round(float(np.min(arr)), 1), | |
| "std_dev": round(float(np.std(arr)), 1), | |
| "total_frames": self.frame_count, | |
| "label": self._label(float(np.mean(arr))), | |
| } | |
| def reset(self): | |
| self.history.clear() | |
| self.session_scores.clear() | |
| self.frame_count = 0 | |
| # DRAW OVERLAY | |
| def draw_fusion_overlay(frame, result: dict) -> object: | |
| score = result["score"] | |
| label = result["label"] | |
| breakdown = result["breakdown"] | |
| tip = result["tips"][0] if result["tips"] else "" | |
| weakest = result["weakest_signal"] | |
| label_color = { | |
| "Confident": (0, 220, 0), | |
| "Moderate": (0, 200, 255), | |
| "Needs Improvement":(0, 80, 255), | |
| }.get(label, (200, 200, 200)) | |
| # Main score bar background | |
| cv2.rectangle(frame, (8, 8), (350, 105), (30, 30, 30), -1) | |
| cv2.rectangle(frame, (8, 8), (350, 105), (80, 80, 80), 1) | |
| cv2.putText(frame, f"Confidence: {score}/100", (15, 35), | |
| cv2.FONT_HERSHEY_SIMPLEX, 0.85, label_color, 2) | |
| cv2.putText(frame, label, (15, 60), | |
| cv2.FONT_HERSHEY_SIMPLEX, 0.65, label_color, 1) | |
| # Score bar fill | |
| bar_x, bar_y, bar_w, bar_h = 15, 68, 320, 10 | |
| filled = int(bar_w * score / 100) | |
| cv2.rectangle(frame, (bar_x, bar_y), (bar_x + bar_w, bar_y + bar_h), (60, 60, 60), -1) | |
| cv2.rectangle(frame, (bar_x, bar_y), (bar_x + filled, bar_y + bar_h), label_color, -1) | |
| # Breakdown panel (bottom-left) | |
| signals = [ | |
| ("Eye", breakdown.get("eye_contact", 0)), | |
| ("Expr", breakdown.get("expression", 0)), | |
| ("Head", breakdown.get("head_pose", 0)), | |
| ("Audio",breakdown.get("audio", 0)), | |
| ] | |
| panel_y = frame.shape[0] - 90 | |
| cv2.rectangle(frame, (8, panel_y - 15), (260, frame.shape[0] - 8), (30, 30, 30), -1) | |
| cv2.rectangle(frame, (8, panel_y - 15), (260, frame.shape[0] - 8), (70, 70, 70), 1) | |
| for i, (name, val) in enumerate(signals): | |
| x = 15 + i * 62 | |
| color = (0, 200, 0) if val >= 70 else (0, 200, 255) if val >= 40 else (0, 80, 255) | |
| cv2.putText(frame, name, (x, panel_y + 5), | |
| cv2.FONT_HERSHEY_SIMPLEX, 0.42, (180, 180, 180), 1) | |
| cv2.putText(frame, str(int(val)), (x + 5, panel_y + 25), | |
| cv2.FONT_HERSHEY_SIMPLEX, 0.55, color, 1) | |
| mini_filled = int(40 * val / 100) | |
| cv2.rectangle(frame, (x, panel_y + 32), (x + 40, panel_y + 38), (60, 60, 60), -1) | |
| cv2.rectangle(frame, (x, panel_y + 32), (x + mini_filled, panel_y + 38), color, -1) | |
| # Coaching tip (top-right area) | |
| tip_short = tip[:60] + ("..." if len(tip) > 60 else "") | |
| cv2.putText(frame, f"Tip [{weakest}]: {tip_short}", (10, frame.shape[0] - 100), | |
| cv2.FONT_HERSHEY_SIMPLEX, 0.42, (200, 200, 100), 1) | |
| return frame | |
| # TEST: IMAGE | |
| def test_on_image(image_path: str): | |
| import sys, os | |
| sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) | |
| from face_landmarks import FaceLandmarkExtractor | |
| from eye_contact import EyeContactDetector | |
| from expression_detection import ExpressionDetector | |
| from head_pose import HeadPoseEstimator | |
| frame = cv2.imread(image_path) | |
| if frame is None: | |
| print(f"[ERROR] Cannot load: {image_path}") | |
| return | |
| extractor = FaceLandmarkExtractor() | |
| lm_result = extractor.extract_image(frame) | |
| if not lm_result["face_detected"]: | |
| print("[ERROR] No face detected.") | |
| return | |
| kp = lm_result["key_points"] | |
| ear = lm_result["ear"] | |
| eye_detector = EyeContactDetector() | |
| expr_detector = ExpressionDetector() | |
| head_estimator = HeadPoseEstimator() | |
| scorer = FusionScorer() | |
| eye_result = eye_detector.detect(kp, frame.shape) | |
| expr_result = expr_detector.detect(kp, ear) | |
| head_result = head_estimator.estimate(kp, frame.shape) | |
| result = scorer.compute( | |
| eye_contact_score = eye_result.get("score", 0), | |
| expression_score = expr_result.get("score", 0), | |
| head_pose_score = head_result.get("score", 0), | |
| audio_score = 0, | |
| ) | |
| print("\n" + "="*45) | |
| print(" MODULE 5 — FUSION SCORE (IMAGE)") | |
| print("="*45) | |
| print(f" Fused Score : {result['score']}/100") | |
| print(f" Label : {result['label']}") | |
| print(f" Eye Contact : {result['breakdown']['eye_contact']}/100") | |
| print(f" Expression : {result['breakdown']['expression']}/100") | |
| print(f" Head Pose : {result['breakdown']['head_pose']}/100") | |
| print(f" Audio : {result['breakdown']['audio']}/100 (N/A for image)") | |
| print(f" Weakest : {result['weakest_signal']}") | |
| print("\n Tips:") | |
| for tip in result["tips"]: | |
| print(f" • {tip}") | |
| out = lm_result["annotated_frame"].copy() | |
| out = draw_fusion_overlay(out, result) | |
| cv2.imshow("Module 5 - Fusion Score (any key to close)", out) | |
| cv2.waitKey(0) | |
| cv2.destroyAllWindows() | |
| extractor.release() | |
| # TEST: WEBCAM | |
| def test_webcam(): | |
| import sys, os | |
| sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) | |
| from face_landmarks import FaceLandmarkExtractor | |
| from eye_contact import EyeContactDetector | |
| from expression_detection import ExpressionDetector | |
| from head_pose import HeadPoseEstimator | |
| extractor = FaceLandmarkExtractor() | |
| eye_detector = EyeContactDetector() | |
| expr_detector = ExpressionDetector() | |
| head_estimator = HeadPoseEstimator() | |
| scorer = FusionScorer() | |
| cap = cv2.VideoCapture(0) | |
| if not cap.isOpened(): | |
| print("[ERROR] Cannot open webcam.") | |
| return | |
| print("[INFO] Webcam started. Press Q to quit.\n") | |
| while True: | |
| ret, frame = cap.read() | |
| if not ret: | |
| break | |
| lm_result = extractor.extract(frame) | |
| disp = lm_result["annotated_frame"].copy() | |
| if lm_result["face_detected"]: | |
| kp = lm_result["key_points"] | |
| ear = lm_result["ear"] | |
| eye_result = eye_detector.detect(kp, frame.shape) | |
| expr_result = expr_detector.detect(kp, ear) | |
| head_result = head_estimator.estimate(kp, frame.shape) | |
| result = scorer.compute( | |
| eye_contact_score = eye_result.get("score", 0), | |
| expression_score = expr_result.get("score", 0), | |
| head_pose_score = head_result.get("score", 0), | |
| audio_score = 0, | |
| ) | |
| disp = draw_fusion_overlay(disp, result) | |
| else: | |
| cv2.putText(disp, "No face detected", (10, 40), | |
| cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2) | |
| cv2.imshow("Module 5 - Fusion Score (Q to quit)", disp) | |
| if cv2.waitKey(1) & 0xFF == ord('q'): | |
| break | |
| summary = scorer.session_summary() | |
| print("\n" + "="*45) | |
| print(" SESSION SUMMARY") | |
| print("="*45) | |
| print(f" Avg Score : {summary.get('avg_score', 0)}/100") | |
| print(f" Max Score : {summary.get('max_score', 0)}/100") | |
| print(f" Min Score : {summary.get('min_score', 0)}/100") | |
| print(f" Std Dev : {summary.get('std_dev', 0)}") | |
| print(f" Frames : {summary.get('total_frames', 0)}") | |
| print(f" Overall : {summary.get('label', 'N/A')}") | |
| cap.release() | |
| extractor.release() | |
| cv2.destroyAllWindows() | |
| # ENTRY POINT | |
| if __name__ == "__main__": | |
| import sys | |
| if len(sys.argv) >= 3 and sys.argv[1] == "--image": | |
| test_on_image(sys.argv[2]) | |
| sys.exit(0) | |
| elif len(sys.argv) >= 2 and sys.argv[1] == "--webcam": | |
| test_webcam() | |
| sys.exit(0) | |
| print("\n" + "="*45) | |
| print(" MODULE 5 - Fusion Scoring") | |
| print("="*45) | |
| print(" [1] Test on IMAGE") | |
| print(" [2] Live WEBCAM") | |
| print("="*45) | |
| choice = input(" Choice (1 or 2): ").strip() | |
| if choice == "1": | |
| path = input(" Image path: ").strip().strip('"') | |
| test_on_image(path) | |
| elif choice == "2": | |
| test_webcam() | |
| else: | |
| print(" Invalid choice.") |