Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import cv2
|
| 3 |
+
import mediapipe as mp
|
| 4 |
+
import numpy as np
|
| 5 |
+
import pickle
|
| 6 |
+
import torch
|
| 7 |
+
import time
|
| 8 |
+
import pandas as pd
|
| 9 |
+
|
| 10 |
+
# Load ML model
|
| 11 |
+
model = pickle.load(open('model.pkl', 'rb'))
|
| 12 |
+
|
| 13 |
+
# Load YOLOv7 model (GPU or CPU)
|
| 14 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 15 |
+
yolo_model = torch.hub.load('WongKinYiu/yolov7', 'custom', 'yolov7.pt', source='local').to(device)
|
| 16 |
+
|
| 17 |
+
# Mediapipe setup
|
| 18 |
+
mp_face = mp.solutions.face_detection.FaceDetection(model_selection=0, min_detection_confidence=0.5)
|
| 19 |
+
mp_hands = mp.solutions.hands.Hands()
|
| 20 |
+
mp_pose = mp.solutions.face_mesh.FaceMesh()
|
| 21 |
+
|
| 22 |
+
# Streamlit UI setup
|
| 23 |
+
st.set_page_config(page_title="Multi-Face Attention Detector", layout='wide')
|
| 24 |
+
st.title("🎥 Real-Time Multi-Face Attention Detector (YOLOv7 + Mediapipe + GPU)")
|
| 25 |
+
run = st.checkbox('Start Webcam')
|
| 26 |
+
|
| 27 |
+
FRAME_WINDOW = st.image([])
|
| 28 |
+
attention_log = []
|
| 29 |
+
start_time = time.time()
|
| 30 |
+
|
| 31 |
+
if run:
|
| 32 |
+
cap = cv2.VideoCapture(0)
|
| 33 |
+
while cap.isOpened():
|
| 34 |
+
ret, frame = cap.read()
|
| 35 |
+
if not ret:
|
| 36 |
+
st.warning("⚠️ Cannot access webcam.")
|
| 37 |
+
break
|
| 38 |
+
|
| 39 |
+
h, w, _ = frame.shape
|
| 40 |
+
|
| 41 |
+
# YOLOv7 phone detection
|
| 42 |
+
results = yolo_model([frame])
|
| 43 |
+
phones = []
|
| 44 |
+
for det in results.xyxy[0]:
|
| 45 |
+
cls_id = int(det[-1])
|
| 46 |
+
if cls_id == 67: # COCO class: cellphone
|
| 47 |
+
phones.append((int(det[0]), int(det[1]), int(det[2] - det[0]), int(det[3] - det[1])))
|
| 48 |
+
|
| 49 |
+
# Mediapipe face detection
|
| 50 |
+
face_res = mp_face.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
| 51 |
+
if face_res.detections:
|
| 52 |
+
for i, det in enumerate(face_res.detections):
|
| 53 |
+
bbox = det.location_data.relative_bounding_box
|
| 54 |
+
face_x, face_y = int(bbox.xmin * w), int(bbox.ymin * h)
|
| 55 |
+
face_w, face_h = int(bbox.width * w), int(bbox.height * h)
|
| 56 |
+
face_conf = det.score[0]
|
| 57 |
+
|
| 58 |
+
# Hand tracking
|
| 59 |
+
hand_res = mp_hands.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
| 60 |
+
no_hands = len(hand_res.multi_hand_landmarks) if hand_res.multi_hand_landmarks else 0
|
| 61 |
+
|
| 62 |
+
# Head pose (simplified)
|
| 63 |
+
pose_res = mp_pose.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
| 64 |
+
pose_label, pose_x, pose_y = 'forward', 0, 0
|
| 65 |
+
if pose_res.multi_face_landmarks:
|
| 66 |
+
nose = pose_res.multi_face_landmarks[0].landmark[1]
|
| 67 |
+
pose_x, pose_y = int(nose.x * w), int(nose.y * h)
|
| 68 |
+
if pose_y > h * 0.6: pose_label = 'down'
|
| 69 |
+
elif pose_x < w * 0.3: pose_label = 'left'
|
| 70 |
+
elif pose_x > w * 0.7: pose_label = 'right'
|
| 71 |
+
pose_map = {'forward': 0, 'down': 1, 'left': 2, 'right': 3}
|
| 72 |
+
pose_code = pose_map.get(pose_label, 0)
|
| 73 |
+
|
| 74 |
+
phone_detected = int(any(
|
| 75 |
+
face_x < p[0] + p[2] < face_x + face_w and face_y < p[1] + p[3] < face_y + face_h
|
| 76 |
+
for p in phones
|
| 77 |
+
))
|
| 78 |
+
|
| 79 |
+
if phones:
|
| 80 |
+
phone_x, phone_y, phone_w, phone_h = phones[0]
|
| 81 |
+
else:
|
| 82 |
+
phone_x, phone_y, phone_w, phone_h = 0, 0, 0, 0
|
| 83 |
+
|
| 84 |
+
feature_vector = np.array([[1, face_x, face_y, face_w, face_h, face_conf,
|
| 85 |
+
no_hands, pose_code, pose_x, pose_y, phone_detected,
|
| 86 |
+
phone_x, phone_y, phone_w, phone_h, 0.8]])
|
| 87 |
+
|
| 88 |
+
pred = model.predict(feature_vector)[0]
|
| 89 |
+
attention_text = 'Attentive' if pred == 0 else 'Inattentive'
|
| 90 |
+
|
| 91 |
+
attention_log.append({'face_id': i + 1, 'time': time.time() - start_time, 'state': attention_text})
|
| 92 |
+
|
| 93 |
+
color = (0, 255, 0) if pred == 0 else (0, 0, 255)
|
| 94 |
+
cv2.rectangle(frame, (face_x, face_y), (face_x + face_w, face_y + face_h), color, 2)
|
| 95 |
+
cv2.putText(frame, f'Face {i + 1}: {attention_text}', (face_x, face_y - 10),
|
| 96 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
| 97 |
+
|
| 98 |
+
FRAME_WINDOW.image(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
| 99 |
+
|
| 100 |
+
if cv2.waitKey(1) & 0xFF == ord('q'):
|
| 101 |
+
break
|
| 102 |
+
|
| 103 |
+
cap.release()
|
| 104 |
+
|
| 105 |
+
# Process log for dashboard
|
| 106 |
+
if attention_log:
|
| 107 |
+
df = pd.DataFrame(attention_log)
|
| 108 |
+
attentive = df[df['state'] == 'Attentive'].shape[0]
|
| 109 |
+
inattentive = df[df['state'] == 'Inattentive'].shape[0]
|
| 110 |
+
st.markdown("### 📊 Attention Statistics")
|
| 111 |
+
st.write(f"✅ Attentive detections: {attentive}")
|
| 112 |
+
st.write(f"⚠️ Inattentive detections: {inattentive}")
|
| 113 |
+
st.dataframe(df.tail(10))
|
| 114 |
+
st.line_chart(df.groupby('time')['state'].apply(lambda x: (x == 'Attentive').mean()))
|
| 115 |
+
st.download_button("Download Log as CSV", df.to_csv(index=False), file_name="attention_log.csv")
|