Saba Al-Gafri commited on
Commit
999bbd8
·
1 Parent(s): 9a71a9c

Replace CNN_MODEL submodule with actual files

Browse files
models/cnn/CNN_MODEL DELETED
@@ -1 +0,0 @@
1
- Subproject commit 330d93a3bc5c419c55e7ee18c6e47e2320030b47
 
 
models/cnn/CNN_MODEL/.claude/settings.local.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(# Check Dataset_subset counts echo \"\"=== Dataset_subset/train/open ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/train/open/ | wc -l && echo \"\"=== Dataset_subset/train/closed ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/train/closed/ | wc -l && echo \"\"=== Dataset_subset/val/open ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/val/open/ | wc -l && echo \"\"=== Dataset_subset/val/closed ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/val/closed/)"
5
+ ]
6
+ }
7
+ }
models/cnn/CNN_MODEL/.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ DATA/** filter=lfs diff=lfs merge=lfs -text
models/cnn/CNN_MODEL/.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Dataset/train/
2
+ Dataset/val/
3
+ Dataset/test/
4
+ .DS_Store
models/cnn/CNN_MODEL/README.md ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Eye Open / Closed Classifier (YOLOv11-CLS)
2
+
3
+
4
+ Binary classifier: **open** vs **closed** eyes.
5
+ Used as a baseline for eye-tracking, drowsiness, or focus detection.
6
+
7
+ ---
8
+
9
+ ## Model team task
10
+
11
+ - **Train** the YOLOv11s-cls eye classifier in a **separate notebook** (data split, epochs, GPU, export `best.pt`).
12
+ - Provide **trained weights** (`best.pt`) for this repo’s evaluation and inference scripts.
13
+
14
+
15
+
16
+ ---
17
+
18
+ ## Repo contents
19
+
20
+ - **notebooks/eye_classifier_colab.ipynb** — Data download (Kaggle), clean, split, undersample, **evaluate** (needs `best.pt` from model team), export.
21
+ - **scripts/predict_image.py** — Run classifier on single images (needs `best.pt`).
22
+ - **scripts/webcam_live.py** — Live webcam open/closed (needs `best.pt` + optional `weights/face_landmarker.task`).
23
+ - **scripts/video_infer.py** — Run on video files.
24
+ - **scripts/focus_infer.py** — Focus/attention inference.
25
+ - **weights/** — Put `best.pt` here; `face_landmarker.task` is downloaded on first webcam run if missing.
26
+ - **docs/** — Extra docs (e.g. UNNECESSARY_FILES.md if present).
27
+
28
+ ---
29
+
30
+ ## Dataset
31
+
32
+ - **Source:** [Kaggle — open/closed eyes](https://www.kaggle.com/datasets/sehriyarmemmedli/open-closed-eyes-dataset)
33
+ - The Colab notebook downloads it via `kagglehub`; no local copy in repo.
34
+
35
+ ---
36
+
37
+ ## Weights
38
+
39
+ - Put **best.pt** from the model team in **weights/best.pt** (or `runs/classify/runs_cls/eye_open_closed_cpu/weights/best.pt`).
40
+ - For webcam: **face_landmarker.task** is downloaded into **weights/** on first run if missing.
41
+
42
+ ---
43
+
44
+ ## Local setup
45
+
46
+ ```bash
47
+ pip install ultralytics opencv-python mediapipe "numpy<2"
48
+ ```
49
+
50
+ Optional: use a venv. From repo root:
51
+ - `python scripts/predict_image.py <image.png>`
52
+ - `python scripts/webcam_live.py`
53
+ - `python scripts/video_infer.py` (expects 1.mp4 / 2.mp4 in repo root or set `VIDEOS` env)
54
+ - `python scripts/focus_infer.py`
55
+
56
+ ---
57
+
58
+ ## Project structure
59
+
60
+ ```
61
+ ├── notebooks/
62
+ │ └── eye_classifier_colab.ipynb # Data + eval (no training)
63
+ ├── scripts/
64
+ │ ├── predict_image.py
65
+ │ ├── webcam_live.py
66
+ │ ├── video_infer.py
67
+ │ └── focus_infer.py
68
+ ├── weights/ # best.pt, face_landmarker.task
69
+ ├── docs/ # extra docs
70
+ ├── README.md
71
+ └── venv/ # optional
72
+ ```
73
+
74
+ Training and weight generation: **model team, separate notebook.**
models/cnn/CNN_MODEL/notebooks/eye_classifier_colab.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
models/cnn/CNN_MODEL/scripts/focus_infer.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ import os
5
+
6
+ import cv2
7
+ import numpy as np
8
+ from ultralytics import YOLO
9
+
10
+
11
+ def list_images(folder: Path):
12
+ exts = {".png", ".jpg", ".jpeg", ".bmp", ".webp"}
13
+ return sorted([p for p in folder.iterdir() if p.suffix.lower() in exts])
14
+
15
+
16
+ def find_weights(project_root: Path) -> Path | None:
17
+ candidates = [
18
+ project_root / "weights" / "best.pt",
19
+ project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
20
+ project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
21
+ project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
22
+ project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
23
+ ]
24
+ return next((p for p in candidates if p.is_file()), None)
25
+
26
+
27
+ def detect_eyelid_boundary(gray: np.ndarray) -> np.ndarray | None:
28
+ """
29
+ Returns an ellipse fit to the largest contour near the eye boundary.
30
+ Output format: (center(x,y), (axis1, axis2), angle) or None.
31
+ """
32
+ blur = cv2.GaussianBlur(gray, (5, 5), 0)
33
+ edges = cv2.Canny(blur, 40, 120)
34
+ edges = cv2.dilate(edges, np.ones((3, 3), np.uint8), iterations=1)
35
+ contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
36
+ if not contours:
37
+ return None
38
+ contours = sorted(contours, key=cv2.contourArea, reverse=True)
39
+ for c in contours:
40
+ if len(c) >= 5 and cv2.contourArea(c) > 50:
41
+ return cv2.fitEllipse(c)
42
+ return None
43
+
44
+
45
+ def detect_pupil_center(gray: np.ndarray) -> tuple[int, int] | None:
46
+ """
47
+ More robust pupil detection:
48
+ - enhance contrast (CLAHE)
49
+ - find dark blobs
50
+ - score by circularity and proximity to center
51
+ """
52
+ h, w = gray.shape
53
+ clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
54
+ eq = clahe.apply(gray)
55
+ blur = cv2.GaussianBlur(eq, (7, 7), 0)
56
+
57
+ # Focus on the central region to avoid eyelashes/edges
58
+ cx, cy = w // 2, h // 2
59
+ rx, ry = int(w * 0.3), int(h * 0.3)
60
+ x0, x1 = max(cx - rx, 0), min(cx + rx, w)
61
+ y0, y1 = max(cy - ry, 0), min(cy + ry, h)
62
+ roi = blur[y0:y1, x0:x1]
63
+
64
+ # Inverted threshold to capture dark pupil
65
+ _, thresh = cv2.threshold(roi, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
66
+ thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, np.ones((3, 3), np.uint8), iterations=2)
67
+ thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8), iterations=1)
68
+
69
+ contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
70
+ if not contours:
71
+ return None
72
+
73
+ best = None
74
+ best_score = -1.0
75
+ for c in contours:
76
+ area = cv2.contourArea(c)
77
+ if area < 15:
78
+ continue
79
+ perimeter = cv2.arcLength(c, True)
80
+ if perimeter <= 0:
81
+ continue
82
+ circularity = 4 * np.pi * (area / (perimeter * perimeter))
83
+ if circularity < 0.3:
84
+ continue
85
+ m = cv2.moments(c)
86
+ if m["m00"] == 0:
87
+ continue
88
+ px = int(m["m10"] / m["m00"]) + x0
89
+ py = int(m["m01"] / m["m00"]) + y0
90
+
91
+ # Score by circularity and distance to center
92
+ dist = np.hypot(px - cx, py - cy) / max(w, h)
93
+ score = circularity - dist
94
+ if score > best_score:
95
+ best_score = score
96
+ best = (px, py)
97
+
98
+ return best
99
+
100
+
101
+ def is_focused(pupil_center: tuple[int, int], img_shape: tuple[int, int]) -> bool:
102
+ """
103
+ Decide focus based on pupil offset from image center.
104
+ """
105
+ h, w = img_shape
106
+ cx, cy = w // 2, h // 2
107
+ px, py = pupil_center
108
+ dx = abs(px - cx) / max(w, 1)
109
+ dy = abs(py - cy) / max(h, 1)
110
+ return (dx < 0.12) and (dy < 0.12)
111
+
112
+
113
+ def annotate(img_bgr: np.ndarray, ellipse, pupil_center, focused: bool, cls_label: str, conf: float):
114
+ out = img_bgr.copy()
115
+ if ellipse is not None:
116
+ cv2.ellipse(out, ellipse, (0, 255, 255), 2)
117
+ if pupil_center is not None:
118
+ cv2.circle(out, pupil_center, 4, (0, 0, 255), -1)
119
+ label = f"{cls_label} ({conf:.2f}) | focused={int(focused)}"
120
+ cv2.putText(out, label, (8, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
121
+ return out
122
+
123
+
124
+ def main():
125
+ project_root = Path(__file__).resolve().parent.parent
126
+ data_dir = project_root / "Dataset"
127
+ alt_data_dir = project_root / "DATA"
128
+ out_dir = project_root / "runs_focus"
129
+ out_dir.mkdir(parents=True, exist_ok=True)
130
+
131
+ weights = find_weights(project_root)
132
+ if weights is None:
133
+ print("Weights not found. Train first.")
134
+ return
135
+
136
+ # Support both Dataset/test/{open,closed} and Dataset/{open,closed}
137
+ def resolve_test_dirs(root: Path):
138
+ test_open = root / "test" / "open"
139
+ test_closed = root / "test" / "closed"
140
+ if test_open.exists() and test_closed.exists():
141
+ return test_open, test_closed
142
+ test_open = root / "open"
143
+ test_closed = root / "closed"
144
+ if test_open.exists() and test_closed.exists():
145
+ return test_open, test_closed
146
+ alt_closed = root / "close"
147
+ if test_open.exists() and alt_closed.exists():
148
+ return test_open, alt_closed
149
+ return None, None
150
+
151
+ test_open, test_closed = resolve_test_dirs(data_dir)
152
+ if (test_open is None or test_closed is None) and alt_data_dir.exists():
153
+ test_open, test_closed = resolve_test_dirs(alt_data_dir)
154
+
155
+ if not test_open.exists() or not test_closed.exists():
156
+ print("Test folders missing. Expected:")
157
+ print(test_open)
158
+ print(test_closed)
159
+ return
160
+
161
+ test_files = list_images(test_open) + list_images(test_closed)
162
+ print("Total test images:", len(test_files))
163
+ max_images = int(os.getenv("MAX_IMAGES", "0"))
164
+ if max_images > 0:
165
+ test_files = test_files[:max_images]
166
+ print("Limiting to MAX_IMAGES:", max_images)
167
+
168
+ model = YOLO(str(weights))
169
+ results = model.predict(test_files, imgsz=224, device="cpu", verbose=False)
170
+
171
+ names = model.names
172
+ for r in results:
173
+ probs = r.probs
174
+ top_idx = int(probs.top1)
175
+ top_conf = float(probs.top1conf)
176
+ pred_label = names[top_idx]
177
+
178
+ img = cv2.imread(r.path)
179
+ if img is None:
180
+ continue
181
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
182
+
183
+ ellipse = detect_eyelid_boundary(gray)
184
+ pupil_center = detect_pupil_center(gray)
185
+ focused = False
186
+ if pred_label.lower() == "open" and pupil_center is not None:
187
+ focused = is_focused(pupil_center, gray.shape)
188
+
189
+ annotated = annotate(img, ellipse, pupil_center, focused, pred_label, top_conf)
190
+ out_path = out_dir / (Path(r.path).stem + "_annotated.jpg")
191
+ cv2.imwrite(str(out_path), annotated)
192
+
193
+ print(f"{Path(r.path).name}: pred={pred_label} conf={top_conf:.3f} focused={focused}")
194
+
195
+ print(f"\nAnnotated outputs saved to: {out_dir}")
196
+
197
+
198
+ if __name__ == "__main__":
199
+ main()
models/cnn/CNN_MODEL/scripts/predict_image.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Run the eye open/closed model on one or more images."""
2
+ import sys
3
+ from pathlib import Path
4
+
5
+ from ultralytics import YOLO
6
+
7
+
8
+ def main():
9
+ project_root = Path(__file__).resolve().parent.parent
10
+ weight_candidates = [
11
+ project_root / "weights" / "best.pt",
12
+ project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
13
+ project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
14
+ ]
15
+ weights = next((p for p in weight_candidates if p.is_file()), None)
16
+ if weights is None:
17
+ print("Weights not found. Put best.pt in weights/ or runs/.../weights/ (from model team).")
18
+ sys.exit(1)
19
+
20
+ if len(sys.argv) < 2:
21
+ print("Usage: python scripts/predict_image.py <image1> [image2 ...]")
22
+ print("Example: python scripts/predict_image.py path/to/image.png")
23
+ sys.exit(0)
24
+
25
+ model = YOLO(str(weights))
26
+ names = model.names
27
+
28
+ for path in sys.argv[1:]:
29
+ p = Path(path)
30
+ if not p.is_file():
31
+ print(p, "- file not found")
32
+ continue
33
+ try:
34
+ results = model.predict(str(p), imgsz=224, device="cpu", verbose=False)
35
+ except Exception as e:
36
+ print(p, "- error:", e)
37
+ continue
38
+ if not results:
39
+ print(p, "- no result")
40
+ continue
41
+ r = results[0]
42
+ top_idx = int(r.probs.top1)
43
+ conf = float(r.probs.top1conf)
44
+ label = names[top_idx]
45
+ print(f"{p.name}: {label} ({conf:.2%})")
46
+
47
+
48
+ if __name__ == "__main__":
49
+ main()
models/cnn/CNN_MODEL/scripts/video_infer.py ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from pathlib import Path
5
+
6
+ import cv2
7
+ import numpy as np
8
+ from ultralytics import YOLO
9
+
10
+ try:
11
+ import mediapipe as mp
12
+ except Exception: # pragma: no cover
13
+ mp = None
14
+
15
+
16
+ def find_weights(project_root: Path) -> Path | None:
17
+ candidates = [
18
+ project_root / "weights" / "best.pt",
19
+ project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
20
+ project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
21
+ project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
22
+ project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
23
+ ]
24
+ return next((p for p in candidates if p.is_file()), None)
25
+
26
+
27
+ def detect_pupil_center(gray: np.ndarray) -> tuple[int, int] | None:
28
+ h, w = gray.shape
29
+ clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
30
+ eq = clahe.apply(gray)
31
+ blur = cv2.GaussianBlur(eq, (7, 7), 0)
32
+
33
+ cx, cy = w // 2, h // 2
34
+ rx, ry = int(w * 0.3), int(h * 0.3)
35
+ x0, x1 = max(cx - rx, 0), min(cx + rx, w)
36
+ y0, y1 = max(cy - ry, 0), min(cy + ry, h)
37
+ roi = blur[y0:y1, x0:x1]
38
+
39
+ _, thresh = cv2.threshold(roi, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
40
+ thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, np.ones((3, 3), np.uint8), iterations=2)
41
+ thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8), iterations=1)
42
+
43
+ contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
44
+ if not contours:
45
+ return None
46
+
47
+ best = None
48
+ best_score = -1.0
49
+ for c in contours:
50
+ area = cv2.contourArea(c)
51
+ if area < 15:
52
+ continue
53
+ perimeter = cv2.arcLength(c, True)
54
+ if perimeter <= 0:
55
+ continue
56
+ circularity = 4 * np.pi * (area / (perimeter * perimeter))
57
+ if circularity < 0.3:
58
+ continue
59
+ m = cv2.moments(c)
60
+ if m["m00"] == 0:
61
+ continue
62
+ px = int(m["m10"] / m["m00"]) + x0
63
+ py = int(m["m01"] / m["m00"]) + y0
64
+
65
+ dist = np.hypot(px - cx, py - cy) / max(w, h)
66
+ score = circularity - dist
67
+ if score > best_score:
68
+ best_score = score
69
+ best = (px, py)
70
+
71
+ return best
72
+
73
+
74
+ def is_focused(pupil_center: tuple[int, int], img_shape: tuple[int, int]) -> bool:
75
+ h, w = img_shape
76
+ cx = w // 2
77
+ px, _ = pupil_center
78
+ dx = abs(px - cx) / max(w, 1)
79
+ return dx < 0.12
80
+
81
+
82
+ def classify_frame(model: YOLO, frame: np.ndarray) -> tuple[str, float]:
83
+ # Use classifier directly on frame (assumes frame is eye crop)
84
+ results = model.predict(frame, imgsz=224, device="cpu", verbose=False)
85
+ r = results[0]
86
+ probs = r.probs
87
+ top_idx = int(probs.top1)
88
+ top_conf = float(probs.top1conf)
89
+ pred_label = model.names[top_idx]
90
+ return pred_label, top_conf
91
+
92
+
93
+ def annotate_frame(frame: np.ndarray, label: str, focused: bool, conf: float, time_sec: float):
94
+ out = frame.copy()
95
+ text = f"{label} | focused={int(focused)} | conf={conf:.2f} | t={time_sec:.2f}s"
96
+ cv2.putText(out, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
97
+ return out
98
+
99
+
100
+ def write_segments(path: Path, segments: list[tuple[float, float, str]]):
101
+ with path.open("w") as f:
102
+ for start, end, label in segments:
103
+ f.write(f"{start:.2f},{end:.2f},{label}\n")
104
+
105
+
106
+ def process_video(video_path: Path, model: YOLO | None):
107
+ cap = cv2.VideoCapture(str(video_path))
108
+ if not cap.isOpened():
109
+ print(f"Failed to open {video_path}")
110
+ return
111
+
112
+ fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
113
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
114
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
115
+
116
+ out_path = video_path.with_name(video_path.stem + "_pred.mp4")
117
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
118
+ writer = cv2.VideoWriter(str(out_path), fourcc, fps, (width, height))
119
+
120
+ csv_path = video_path.with_name(video_path.stem + "_predictions.csv")
121
+ seg_path = video_path.with_name(video_path.stem + "_segments.txt")
122
+
123
+ frame_idx = 0
124
+ last_label = None
125
+ seg_start = 0.0
126
+ segments: list[tuple[float, float, str]] = []
127
+
128
+ with csv_path.open("w") as fcsv:
129
+ fcsv.write("time_sec,label,focused,conf\n")
130
+ if mp is None:
131
+ print("mediapipe is not installed. Falling back to classifier-only mode.")
132
+ use_mp = mp is not None
133
+ if use_mp:
134
+ mp_face_mesh = mp.solutions.face_mesh
135
+ face_mesh = mp_face_mesh.FaceMesh(
136
+ static_image_mode=False,
137
+ max_num_faces=1,
138
+ refine_landmarks=True,
139
+ min_detection_confidence=0.5,
140
+ min_tracking_confidence=0.5,
141
+ )
142
+
143
+ while True:
144
+ ret, frame = cap.read()
145
+ if not ret:
146
+ break
147
+ time_sec = frame_idx / fps
148
+ conf = 0.0
149
+ pred_label = "open"
150
+ focused = False
151
+
152
+ if use_mp:
153
+ rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
154
+ res = face_mesh.process(rgb)
155
+ if res.multi_face_landmarks:
156
+ lm = res.multi_face_landmarks[0].landmark
157
+ h, w = frame.shape[:2]
158
+
159
+ # Eye landmarks (MediaPipe FaceMesh)
160
+ left_eye = [33, 160, 158, 133, 153, 144]
161
+ right_eye = [362, 385, 387, 263, 373, 380]
162
+ left_iris = [468, 469, 470, 471]
163
+ right_iris = [473, 474, 475, 476]
164
+
165
+ def pts(idxs):
166
+ return np.array([(int(lm[i].x * w), int(lm[i].y * h)) for i in idxs])
167
+
168
+ def ear(eye_pts):
169
+ # EAR using 6 points
170
+ p1, p2, p3, p4, p5, p6 = eye_pts
171
+ v1 = np.linalg.norm(p2 - p6)
172
+ v2 = np.linalg.norm(p3 - p5)
173
+ h1 = np.linalg.norm(p1 - p4)
174
+ return (v1 + v2) / (2.0 * h1 + 1e-6)
175
+
176
+ le = pts(left_eye)
177
+ re = pts(right_eye)
178
+ le_ear = ear(le)
179
+ re_ear = ear(re)
180
+ ear_avg = (le_ear + re_ear) / 2.0
181
+
182
+ # openness threshold
183
+ pred_label = "open" if ear_avg > 0.22 else "closed"
184
+
185
+ # iris centers
186
+ li = pts(left_iris)
187
+ ri = pts(right_iris)
188
+ li_c = li.mean(axis=0).astype(int)
189
+ ri_c = ri.mean(axis=0).astype(int)
190
+
191
+ # eye centers (midpoint of corners)
192
+ le_c = ((le[0] + le[3]) / 2).astype(int)
193
+ re_c = ((re[0] + re[3]) / 2).astype(int)
194
+
195
+ # focus = iris close to eye center horizontally for both eyes
196
+ le_dx = abs(li_c[0] - le_c[0]) / max(np.linalg.norm(le[0] - le[3]), 1)
197
+ re_dx = abs(ri_c[0] - re_c[0]) / max(np.linalg.norm(re[0] - re[3]), 1)
198
+ focused = (pred_label == "open") and (le_dx < 0.18) and (re_dx < 0.18)
199
+
200
+ # draw eye boundaries
201
+ cv2.polylines(frame, [le], True, (0, 255, 255), 1)
202
+ cv2.polylines(frame, [re], True, (0, 255, 255), 1)
203
+ # draw iris centers
204
+ cv2.circle(frame, tuple(li_c), 3, (0, 0, 255), -1)
205
+ cv2.circle(frame, tuple(ri_c), 3, (0, 0, 255), -1)
206
+ else:
207
+ pred_label = "closed"
208
+ focused = False
209
+ else:
210
+ if model is not None:
211
+ pred_label, conf = classify_frame(model, frame)
212
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
213
+ pupil_center = detect_pupil_center(gray) if pred_label.lower() == "open" else None
214
+ focused = False
215
+ if pred_label.lower() == "open" and pupil_center is not None:
216
+ focused = is_focused(pupil_center, gray.shape)
217
+
218
+ if pred_label.lower() != "open":
219
+ focused = False
220
+
221
+ label = "open_focused" if (pred_label.lower() == "open" and focused) else "open_not_focused"
222
+ if pred_label.lower() != "open":
223
+ label = "closed_not_focused"
224
+
225
+ fcsv.write(f"{time_sec:.2f},{label},{int(focused)},{conf:.4f}\n")
226
+
227
+ if last_label is None:
228
+ last_label = label
229
+ seg_start = time_sec
230
+ elif label != last_label:
231
+ segments.append((seg_start, time_sec, last_label))
232
+ seg_start = time_sec
233
+ last_label = label
234
+
235
+ annotated = annotate_frame(frame, label, focused, conf, time_sec)
236
+ writer.write(annotated)
237
+ frame_idx += 1
238
+
239
+ if last_label is not None:
240
+ end_time = frame_idx / fps
241
+ segments.append((seg_start, end_time, last_label))
242
+ write_segments(seg_path, segments)
243
+
244
+ cap.release()
245
+ writer.release()
246
+ print(f"Saved: {out_path}")
247
+ print(f"CSV: {csv_path}")
248
+ print(f"Segments: {seg_path}")
249
+
250
+
251
+ def main():
252
+ project_root = Path(__file__).resolve().parent.parent
253
+ weights = find_weights(project_root)
254
+ model = YOLO(str(weights)) if weights is not None else None
255
+
256
+ # Default to 1.mp4 and 2.mp4 in project root
257
+ videos = []
258
+ for name in ["1.mp4", "2.mp4"]:
259
+ p = project_root / name
260
+ if p.exists():
261
+ videos.append(p)
262
+
263
+ # Also allow passing paths via env var
264
+ extra = os.getenv("VIDEOS", "")
265
+ for v in [x.strip() for x in extra.split(",") if x.strip()]:
266
+ vp = Path(v)
267
+ if not vp.is_absolute():
268
+ vp = project_root / vp
269
+ if vp.exists():
270
+ videos.append(vp)
271
+
272
+ if not videos:
273
+ print("No videos found. Expected 1.mp4 / 2.mp4 in project root.")
274
+ return
275
+
276
+ for v in videos:
277
+ process_video(v, model)
278
+
279
+
280
+ if __name__ == "__main__":
281
+ main()
models/cnn/CNN_MODEL/scripts/webcam_live.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Live webcam: detect face, crop each eye, run open/closed classifier, show on screen.
3
+ Requires: opencv-python, ultralytics, mediapipe (pip install mediapipe).
4
+ Press 'q' to quit.
5
+ """
6
+ import urllib.request
7
+ from pathlib import Path
8
+
9
+ import cv2
10
+ import numpy as np
11
+ from ultralytics import YOLO
12
+
13
+ try:
14
+ import mediapipe as mp
15
+ _mp_has_solutions = hasattr(mp, "solutions")
16
+ except ImportError:
17
+ mp = None
18
+ _mp_has_solutions = False
19
+
20
+ # New MediaPipe Tasks API (Face Landmarker) eye indices
21
+ LEFT_EYE_INDICES_NEW = [263, 249, 390, 373, 374, 380, 381, 382, 362, 466, 388, 387, 386, 385, 384, 398]
22
+ RIGHT_EYE_INDICES_NEW = [33, 7, 163, 144, 145, 153, 154, 155, 133, 246, 161, 160, 159, 158, 157, 173]
23
+ # Old Face Mesh (solutions) indices
24
+ LEFT_EYE_INDICES_OLD = [33, 160, 158, 133, 153, 144]
25
+ RIGHT_EYE_INDICES_OLD = [362, 385, 387, 263, 373, 380]
26
+ EYE_PADDING = 0.35
27
+
28
+
29
+ def find_weights(project_root: Path) -> Path | None:
30
+ candidates = [
31
+ project_root / "weights" / "best.pt",
32
+ project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
33
+ project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
34
+ ]
35
+ return next((p for p in candidates if p.is_file()), None)
36
+
37
+
38
+ def get_eye_roi(frame: np.ndarray, landmarks, indices: list[int]) -> np.ndarray | None:
39
+ h, w = frame.shape[:2]
40
+ pts = np.array([(int(landmarks[i].x * w), int(landmarks[i].y * h)) for i in indices])
41
+ x_min, y_min = pts.min(axis=0)
42
+ x_max, y_max = pts.max(axis=0)
43
+ dx = max(int((x_max - x_min) * EYE_PADDING), 8)
44
+ dy = max(int((y_max - y_min) * EYE_PADDING), 8)
45
+ x0 = max(0, x_min - dx)
46
+ y0 = max(0, y_min - dy)
47
+ x1 = min(w, x_max + dx)
48
+ y1 = min(h, y_max + dy)
49
+ if x1 <= x0 or y1 <= y0:
50
+ return None
51
+ return frame[y0:y1, x0:x1].copy()
52
+
53
+
54
+ def _run_with_solutions(mp, model, cap):
55
+ face_mesh = mp.solutions.face_mesh.FaceMesh(
56
+ static_image_mode=False,
57
+ max_num_faces=1,
58
+ refine_landmarks=True,
59
+ min_detection_confidence=0.5,
60
+ min_tracking_confidence=0.5,
61
+ )
62
+ while True:
63
+ ret, frame = cap.read()
64
+ if not ret:
65
+ break
66
+ rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
67
+ results = face_mesh.process(rgb)
68
+ left_label, left_conf = "—", 0.0
69
+ right_label, right_conf = "—", 0.0
70
+ if results.multi_face_landmarks:
71
+ lm = results.multi_face_landmarks[0].landmark
72
+ for roi, indices, side in [
73
+ (get_eye_roi(frame, lm, LEFT_EYE_INDICES_OLD), LEFT_EYE_INDICES_OLD, "left"),
74
+ (get_eye_roi(frame, lm, RIGHT_EYE_INDICES_OLD), RIGHT_EYE_INDICES_OLD, "right"),
75
+ ]:
76
+ if roi is not None and roi.size > 0:
77
+ try:
78
+ pred = model.predict(roi, imgsz=224, device="cpu", verbose=False)
79
+ if pred:
80
+ r = pred[0]
81
+ label = model.names[int(r.probs.top1)]
82
+ conf = float(r.probs.top1conf)
83
+ if side == "left":
84
+ left_label, left_conf = label, conf
85
+ else:
86
+ right_label, right_conf = label, conf
87
+ except Exception:
88
+ pass
89
+ cv2.putText(frame, f"L: {left_label} ({left_conf:.0%})", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
90
+ cv2.putText(frame, f"R: {right_label} ({right_conf:.0%})", (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
91
+ cv2.imshow("Eye open/closed (q to quit)", frame)
92
+ if cv2.waitKey(1) & 0xFF == ord("q"):
93
+ break
94
+
95
+
96
+ def _run_with_tasks(project_root: Path, model, cap):
97
+ from mediapipe.tasks.python import BaseOptions
98
+ from mediapipe.tasks.python.vision import FaceLandmarker, FaceLandmarkerOptions
99
+ from mediapipe.tasks.python.vision.core import vision_task_running_mode as running_mode
100
+ from mediapipe.tasks.python.vision.core import image as image_lib
101
+
102
+ model_path = project_root / "weights" / "face_landmarker.task"
103
+ if not model_path.is_file():
104
+ print("Downloading face_landmarker.task ...")
105
+ url = "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task"
106
+ urllib.request.urlretrieve(url, model_path)
107
+ print("Done.")
108
+
109
+ options = FaceLandmarkerOptions(
110
+ base_options=BaseOptions(model_asset_path=str(model_path)),
111
+ running_mode=running_mode.VisionTaskRunningMode.IMAGE,
112
+ num_faces=1,
113
+ )
114
+ face_landmarker = FaceLandmarker.create_from_options(options)
115
+ ImageFormat = image_lib.ImageFormat
116
+
117
+ while True:
118
+ ret, frame = cap.read()
119
+ if not ret:
120
+ break
121
+ left_label, left_conf = "—", 0.0
122
+ right_label, right_conf = "—", 0.0
123
+
124
+ rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
125
+ rgb_contiguous = np.ascontiguousarray(rgb)
126
+ mp_image = image_lib.Image(ImageFormat.SRGB, rgb_contiguous)
127
+ result = face_landmarker.detect(mp_image)
128
+
129
+ if result.face_landmarks:
130
+ lm = result.face_landmarks[0]
131
+ for roi, side in [
132
+ (get_eye_roi(frame, lm, LEFT_EYE_INDICES_NEW), "left"),
133
+ (get_eye_roi(frame, lm, RIGHT_EYE_INDICES_NEW), "right"),
134
+ ]:
135
+ if roi is not None and roi.size > 0:
136
+ try:
137
+ pred = model.predict(roi, imgsz=224, device="cpu", verbose=False)
138
+ if pred:
139
+ r = pred[0]
140
+ label = model.names[int(r.probs.top1)]
141
+ conf = float(r.probs.top1conf)
142
+ if side == "left":
143
+ left_label, left_conf = label, conf
144
+ else:
145
+ right_label, right_conf = label, conf
146
+ except Exception:
147
+ pass
148
+
149
+ cv2.putText(frame, f"L: {left_label} ({left_conf:.0%})", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
150
+ cv2.putText(frame, f"R: {right_label} ({right_conf:.0%})", (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
151
+ cv2.imshow("Eye open/closed (q to quit)", frame)
152
+ if cv2.waitKey(1) & 0xFF == ord("q"):
153
+ break
154
+
155
+
156
+ def main():
157
+ project_root = Path(__file__).resolve().parent.parent
158
+ weights = find_weights(project_root)
159
+ if weights is None:
160
+ print("Weights not found. Put best.pt in weights/ or runs/.../weights/ (from model team).")
161
+ return
162
+ if mp is None:
163
+ print("MediaPipe required. Install: pip install mediapipe")
164
+ return
165
+
166
+ model = YOLO(str(weights))
167
+ cap = cv2.VideoCapture(0)
168
+ if not cap.isOpened():
169
+ print("Could not open webcam.")
170
+ return
171
+
172
+ print("Live eye open/closed on your face. Press 'q' to quit.")
173
+ try:
174
+ if _mp_has_solutions:
175
+ _run_with_solutions(mp, model, cap)
176
+ else:
177
+ _run_with_tasks(project_root, model, cap)
178
+ finally:
179
+ cap.release()
180
+ cv2.destroyAllWindows()
181
+
182
+
183
+ if __name__ == "__main__":
184
+ main()
models/cnn/CNN_MODEL/weights/yolo11s-cls.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2b605d1c8c212b434a75a32759a6f7adf1d2b29c35f76bdccd4c794cb653cf2
3
+ size 13630112