salman555's picture
Update app.py
e745f9f verified
import os
import cv2
import time
import wave
import numpy as np
import mediapipe as mp
import gradio as gr
from collections import deque
# --- Muat alarm.wav menjadi numpy untuk playback ---
ALARM_PATH = os.path.abspath("alarm.wav")
with wave.open(ALARM_PATH, "rb") as wf:
SR = wf.getframerate()
CHANNELS = wf.getnchannels()
AUDIO_BYTES = wf.readframes(wf.getnframes())
AUDIO_DATA = np.frombuffer(AUDIO_BYTES, dtype=np.int16)
if CHANNELS > 1:
AUDIO_DATA = AUDIO_DATA.reshape(-1, CHANNELS)
# --- Silence chunk untuk mute segera ---
SILENCE_DURATION = 0.1 # detik
SILENCE_FRAMES = int(SR * SILENCE_DURATION)
SILENCE_DATA = (np.zeros((SILENCE_FRAMES, CHANNELS), dtype=np.int16)
if CHANNELS > 1 else
np.zeros((SILENCE_FRAMES,), dtype=np.int16))
# --- Inisialisasi FaceMesh dengan refine untuk akurasi kelopak ---
mp_face = mp.solutions.face_mesh
face_mesh = mp_face.FaceMesh(
static_image_mode=False,
max_num_faces=1,
refine_landmarks=True,
min_detection_confidence=0.5,
min_tracking_confidence=0.5
)
# 6 landmark mata (EAR standar)
LEFT_EYE = [33, 160, 158, 133, 153, 144]
RIGHT_EYE = [362,385,387,263,373,380]
EAR_THRESHOLD = 0.20
CLOSED_TIME = 1.5 # detik
# State global
ttl_closed = None
last_alarm_time = 0
alarm_playing = False
ear_buffer = deque(maxlen=3)
# Pengaturan deteksi
TARGET_W, TARGET_H = 480, 360
SKIP_FRAMES = 1
frame_counter = 0
def eye_aspect_ratio(lm, inds, w, h):
pts = [np.array([lm[i].x * w, lm[i].y * h]) for i in inds]
A = np.linalg.norm(pts[1] - pts[5])
B = np.linalg.norm(pts[2] - pts[4])
C = np.linalg.norm(pts[0] - pts[3])
return (A + B) / (2.0 * C)
def detect_and_alarm(frame_rgb: np.ndarray):
global ttl_closed, last_alarm_time, alarm_playing, frame_counter
# Buat writable copy
frame = frame_rgb.copy()
# Resize kecil untuk deteksi cepat
small = cv2.resize(frame, (TARGET_W, TARGET_H))
small_rgb = small # Gradio→RGB, FaceMesh juga RGB
# FaceMesh tiap frame (skip=1)
lm = None
if frame_counter % SKIP_FRAMES == 0:
res = face_mesh.process(small_rgb)
if res.multi_face_landmarks:
lm = res.multi_face_landmarks[0].landmark
frame_counter += 1
# Hitung EAR
ear = 1.0
if lm:
ear_l = eye_aspect_ratio(lm, LEFT_EYE, TARGET_W, TARGET_H)
ear_r = eye_aspect_ratio(lm, RIGHT_EYE, TARGET_W, TARGET_H)
ear = (ear_l + ear_r) / 2.0
# Smoothing EAR
ear_buffer.append(ear)
ear_sm = sum(ear_buffer) / len(ear_buffer)
# Overlay EAR
cv2.putText(frame, f"EAR: {ear_sm:.2f}", (20,30),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255,255,255), 2)
audio_out = None
# Cek tutup mata
if ear_sm < EAR_THRESHOLD:
if ttl_closed is None:
ttl_closed = time.time()
elapsed = time.time() - ttl_closed
cv2.putText(frame, f"Closed: {elapsed:.1f}s", (20,70),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,0,255), 2)
if elapsed >= CLOSED_TIME:
now = time.time()
if (now - last_alarm_time) >= 6.0:
audio_out = (SR, AUDIO_DATA)
last_alarm_time = now
alarm_playing = True
cv2.putText(frame, "ALARM!", (20,110),
cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0,0,255), 3)
else:
# Saat mata terbuka kembali, mute alarm segera
if alarm_playing:
audio_out = (SR, SILENCE_DATA)
alarm_playing = False
ttl_closed = None
return frame, audio_out
with gr.Blocks() as demo:
cam = gr.Image(sources=["webcam"], streaming=True, label="Webcam")
out = gr.Image(label="Processed")
audio = gr.Audio(type="numpy", autoplay=True, visible=False)
cam.stream(fn=detect_and_alarm,
inputs=cam,
outputs=[out, audio],
stream_every=0.1,
time_limit=600)
if __name__ == "__main__":
demo.launch()