AI-Interview-system / modules /fusion_scoring.py
Sunaina792's picture
Upload 29 files
aa8e154 verified
# MODULE 5: Fusion Scoring
# AI Interview Confidence & Behavior Analysis System
import cv2
import numpy as np
from collections import deque
# CONFIGURATION
SCORE_HISTORY_WINDOW = 30 # frames for rolling average
WEIGHTS = {
"eye_contact": 0.30,
"expression": 0.25,
"head_pose": 0.25,
"audio": 0.20,
}
COACHING_TIPS = {
"eye_contact": {
"low": "Maintain eye contact — look into the camera, not at the screen edges",
"mid": "Good gaze direction — try to stay consistent",
"high": "Excellent eye contact",
},
"expression": {
"low": "Relax your face — tension around the mouth and brows signals nervousness",
"mid": "Expression is mostly calm — slight improvements possible",
"high": "Facial expression looks confident and composed",
},
"head_pose": {
"low": "Keep your head steady and centered — avoid excessive nodding or tilting",
"mid": "Head pose is acceptable — try to reduce side tilts",
"high": "Head position looks stable and professional",
},
"audio": {
"low": "Speak clearly and steadily — avoid long pauses and filler words",
"mid": "Voice is decent — work on projection and pace",
"high": "Voice sounds confident and well-paced",
},
}
# FUSION SCORER
class FusionScorer:
def __init__(self):
self.history = deque(maxlen=SCORE_HISTORY_WINDOW)
self.session_scores = []
self.frame_count = 0
def compute(
self,
eye_contact_score: float = 0,
expression_score: float = 0,
head_pose_score: float = 0,
audio_score: float = 0,
) -> dict:
"""
Fuses individual module scores into a single confidence score.
Each input is expected 0–100.
Returns dict with fused score, label, breakdown, and tips.
"""
self.frame_count += 1
scores = {
"eye_contact": float(eye_contact_score),
"expression": float(expression_score),
"head_pose": float(head_pose_score),
"audio": float(audio_score),
}
raw = sum(scores[k] * WEIGHTS[k] for k in scores)
self.history.append(raw)
self.session_scores.append(raw)
smoothed = round(float(np.mean(self.history)), 1)
label = self._label(smoothed)
tips = self._tips(scores)
dominant = self._weakest_signal(scores)
return {
"score": smoothed,
"raw_score": round(raw, 1),
"label": label,
"breakdown": {k: round(scores[k], 1) for k in scores},
"weights": WEIGHTS,
"tips": tips,
"weakest_signal": dominant,
"frame": self.frame_count,
}
def _label(self, score: float) -> str:
if score >= 75:
return "Confident"
elif score >= 50:
return "Moderate"
else:
return "Needs Improvement"
def _tips(self, scores: dict) -> list:
tips = []
for signal, val in scores.items():
bucket = self._bucket(val)
tips.append(COACHING_TIPS[signal][bucket])
return tips
def _bucket(self, val: float) -> str:
if val >= 70:
return "high"
elif val >= 40:
return "mid"
else:
return "low"
def _weakest_signal(self, scores: dict) -> str:
return min(scores, key=lambda k: scores[k])
def session_summary(self) -> dict:
"""Call at end of session to get overall stats."""
if not self.session_scores:
return {}
arr = np.array(self.session_scores)
return {
"avg_score": round(float(np.mean(arr)), 1),
"max_score": round(float(np.max(arr)), 1),
"min_score": round(float(np.min(arr)), 1),
"std_dev": round(float(np.std(arr)), 1),
"total_frames": self.frame_count,
"label": self._label(float(np.mean(arr))),
}
def reset(self):
self.history.clear()
self.session_scores.clear()
self.frame_count = 0
# DRAW OVERLAY
def draw_fusion_overlay(frame, result: dict) -> object:
score = result["score"]
label = result["label"]
breakdown = result["breakdown"]
tip = result["tips"][0] if result["tips"] else ""
weakest = result["weakest_signal"]
label_color = {
"Confident": (0, 220, 0),
"Moderate": (0, 200, 255),
"Needs Improvement":(0, 80, 255),
}.get(label, (200, 200, 200))
# Main score bar background
cv2.rectangle(frame, (8, 8), (350, 105), (30, 30, 30), -1)
cv2.rectangle(frame, (8, 8), (350, 105), (80, 80, 80), 1)
cv2.putText(frame, f"Confidence: {score}/100", (15, 35),
cv2.FONT_HERSHEY_SIMPLEX, 0.85, label_color, 2)
cv2.putText(frame, label, (15, 60),
cv2.FONT_HERSHEY_SIMPLEX, 0.65, label_color, 1)
# Score bar fill
bar_x, bar_y, bar_w, bar_h = 15, 68, 320, 10
filled = int(bar_w * score / 100)
cv2.rectangle(frame, (bar_x, bar_y), (bar_x + bar_w, bar_y + bar_h), (60, 60, 60), -1)
cv2.rectangle(frame, (bar_x, bar_y), (bar_x + filled, bar_y + bar_h), label_color, -1)
# Breakdown panel (bottom-left)
signals = [
("Eye", breakdown.get("eye_contact", 0)),
("Expr", breakdown.get("expression", 0)),
("Head", breakdown.get("head_pose", 0)),
("Audio",breakdown.get("audio", 0)),
]
panel_y = frame.shape[0] - 90
cv2.rectangle(frame, (8, panel_y - 15), (260, frame.shape[0] - 8), (30, 30, 30), -1)
cv2.rectangle(frame, (8, panel_y - 15), (260, frame.shape[0] - 8), (70, 70, 70), 1)
for i, (name, val) in enumerate(signals):
x = 15 + i * 62
color = (0, 200, 0) if val >= 70 else (0, 200, 255) if val >= 40 else (0, 80, 255)
cv2.putText(frame, name, (x, panel_y + 5),
cv2.FONT_HERSHEY_SIMPLEX, 0.42, (180, 180, 180), 1)
cv2.putText(frame, str(int(val)), (x + 5, panel_y + 25),
cv2.FONT_HERSHEY_SIMPLEX, 0.55, color, 1)
mini_filled = int(40 * val / 100)
cv2.rectangle(frame, (x, panel_y + 32), (x + 40, panel_y + 38), (60, 60, 60), -1)
cv2.rectangle(frame, (x, panel_y + 32), (x + mini_filled, panel_y + 38), color, -1)
# Coaching tip (top-right area)
tip_short = tip[:60] + ("..." if len(tip) > 60 else "")
cv2.putText(frame, f"Tip [{weakest}]: {tip_short}", (10, frame.shape[0] - 100),
cv2.FONT_HERSHEY_SIMPLEX, 0.42, (200, 200, 100), 1)
return frame
# TEST: IMAGE
def test_on_image(image_path: str):
import sys, os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from face_landmarks import FaceLandmarkExtractor
from eye_contact import EyeContactDetector
from expression_detection import ExpressionDetector
from head_pose import HeadPoseEstimator
frame = cv2.imread(image_path)
if frame is None:
print(f"[ERROR] Cannot load: {image_path}")
return
extractor = FaceLandmarkExtractor()
lm_result = extractor.extract_image(frame)
if not lm_result["face_detected"]:
print("[ERROR] No face detected.")
return
kp = lm_result["key_points"]
ear = lm_result["ear"]
eye_detector = EyeContactDetector()
expr_detector = ExpressionDetector()
head_estimator = HeadPoseEstimator()
scorer = FusionScorer()
eye_result = eye_detector.detect(kp, frame.shape)
expr_result = expr_detector.detect(kp, ear)
head_result = head_estimator.estimate(kp, frame.shape)
result = scorer.compute(
eye_contact_score = eye_result.get("score", 0),
expression_score = expr_result.get("score", 0),
head_pose_score = head_result.get("score", 0),
audio_score = 0,
)
print("\n" + "="*45)
print(" MODULE 5 — FUSION SCORE (IMAGE)")
print("="*45)
print(f" Fused Score : {result['score']}/100")
print(f" Label : {result['label']}")
print(f" Eye Contact : {result['breakdown']['eye_contact']}/100")
print(f" Expression : {result['breakdown']['expression']}/100")
print(f" Head Pose : {result['breakdown']['head_pose']}/100")
print(f" Audio : {result['breakdown']['audio']}/100 (N/A for image)")
print(f" Weakest : {result['weakest_signal']}")
print("\n Tips:")
for tip in result["tips"]:
print(f" • {tip}")
out = lm_result["annotated_frame"].copy()
out = draw_fusion_overlay(out, result)
cv2.imshow("Module 5 - Fusion Score (any key to close)", out)
cv2.waitKey(0)
cv2.destroyAllWindows()
extractor.release()
# TEST: WEBCAM
def test_webcam():
import sys, os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from face_landmarks import FaceLandmarkExtractor
from eye_contact import EyeContactDetector
from expression_detection import ExpressionDetector
from head_pose import HeadPoseEstimator
extractor = FaceLandmarkExtractor()
eye_detector = EyeContactDetector()
expr_detector = ExpressionDetector()
head_estimator = HeadPoseEstimator()
scorer = FusionScorer()
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print("[ERROR] Cannot open webcam.")
return
print("[INFO] Webcam started. Press Q to quit.\n")
while True:
ret, frame = cap.read()
if not ret:
break
lm_result = extractor.extract(frame)
disp = lm_result["annotated_frame"].copy()
if lm_result["face_detected"]:
kp = lm_result["key_points"]
ear = lm_result["ear"]
eye_result = eye_detector.detect(kp, frame.shape)
expr_result = expr_detector.detect(kp, ear)
head_result = head_estimator.estimate(kp, frame.shape)
result = scorer.compute(
eye_contact_score = eye_result.get("score", 0),
expression_score = expr_result.get("score", 0),
head_pose_score = head_result.get("score", 0),
audio_score = 0,
)
disp = draw_fusion_overlay(disp, result)
else:
cv2.putText(disp, "No face detected", (10, 40),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
cv2.imshow("Module 5 - Fusion Score (Q to quit)", disp)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
summary = scorer.session_summary()
print("\n" + "="*45)
print(" SESSION SUMMARY")
print("="*45)
print(f" Avg Score : {summary.get('avg_score', 0)}/100")
print(f" Max Score : {summary.get('max_score', 0)}/100")
print(f" Min Score : {summary.get('min_score', 0)}/100")
print(f" Std Dev : {summary.get('std_dev', 0)}")
print(f" Frames : {summary.get('total_frames', 0)}")
print(f" Overall : {summary.get('label', 'N/A')}")
cap.release()
extractor.release()
cv2.destroyAllWindows()
# ENTRY POINT
if __name__ == "__main__":
import sys
if len(sys.argv) >= 3 and sys.argv[1] == "--image":
test_on_image(sys.argv[2])
sys.exit(0)
elif len(sys.argv) >= 2 and sys.argv[1] == "--webcam":
test_webcam()
sys.exit(0)
print("\n" + "="*45)
print(" MODULE 5 - Fusion Scoring")
print("="*45)
print(" [1] Test on IMAGE")
print(" [2] Live WEBCAM")
print("="*45)
choice = input(" Choice (1 or 2): ").strip()
if choice == "1":
path = input(" Image path: ").strip().strip('"')
test_on_image(path)
elif choice == "2":
test_webcam()
else:
print(" Invalid choice.")