Spaces:
Sleeping
Sleeping
add more features
Browse files- app.py +285 -160
- requirements.txt +1 -0
app.py
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import cv2
|
| 3 |
import numpy as np
|
|
@@ -6,11 +10,17 @@ import torch.nn as nn
|
|
| 6 |
from torchvision.models import efficientnet_v2_s, EfficientNet_V2_S_Weights
|
| 7 |
import albumentations as A
|
| 8 |
from ultralytics import YOLO
|
|
|
|
|
|
|
| 9 |
from collections import deque
|
|
|
|
| 10 |
|
| 11 |
# ============================================================
|
| 12 |
-
# 1.
|
| 13 |
# ============================================================
|
|
|
|
|
|
|
|
|
|
| 14 |
class EfficientNetLSTM(nn.Module):
|
| 15 |
def __init__(self, hidden_size=256, num_layers=2, dropout=0.5):
|
| 16 |
super(EfficientNetLSTM, self).__init__()
|
|
@@ -18,23 +28,11 @@ class EfficientNetLSTM(nn.Module):
|
|
| 18 |
self.efficientnet = efficientnet_v2_s(weights=weights)
|
| 19 |
num_features = self.efficientnet.classifier[1].in_features
|
| 20 |
self.efficientnet.classifier = nn.Identity()
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
input_size=num_features,
|
| 24 |
-
hidden_size=hidden_size,
|
| 25 |
-
num_layers=num_layers,
|
| 26 |
-
batch_first=True,
|
| 27 |
-
dropout=dropout if num_layers > 1 else 0,
|
| 28 |
-
bidirectional=True
|
| 29 |
-
)
|
| 30 |
-
|
| 31 |
self.fc = nn.Sequential(
|
| 32 |
-
nn.Linear(256*2, 256),
|
| 33 |
-
nn.ReLU(
|
| 34 |
-
nn.Dropout(dropout),
|
| 35 |
-
nn.Linear(256, 128),
|
| 36 |
-
nn.ReLU(inplace=True),
|
| 37 |
-
nn.Dropout(dropout),
|
| 38 |
nn.Linear(128, 1)
|
| 39 |
)
|
| 40 |
|
|
@@ -44,193 +42,320 @@ class EfficientNetLSTM(nn.Module):
|
|
| 44 |
features = self.efficientnet(x)
|
| 45 |
features = features.view(batch_size, num_frames, -1)
|
| 46 |
lstm_out, _ = self.lstm(features)
|
| 47 |
-
|
| 48 |
-
output = self.fc(final_features)
|
| 49 |
return output.squeeze()
|
| 50 |
|
| 51 |
-
# Load
|
| 52 |
-
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 53 |
-
MODEL_PATH = "best_model_efficientnet_lstm_v2.pth" # Đảm bảo file này nằm cùng thư mục
|
| 54 |
-
|
| 55 |
print("⏳ Đang tải models...")
|
| 56 |
try:
|
| 57 |
-
|
| 58 |
-
model = EfficientNetLSTM(hidden_size=256, num_layers=2, dropout=0.5).to(DEVICE)
|
| 59 |
model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
|
| 60 |
model.eval()
|
| 61 |
-
|
| 62 |
-
# Load YOLO
|
| 63 |
yolo_model = YOLO("yolov8n.pt")
|
| 64 |
print("✅ Đã tải xong models!")
|
| 65 |
except Exception as e:
|
| 66 |
-
print(f"❌ Lỗi
|
|
|
|
|
|
|
| 67 |
|
| 68 |
# Transform
|
| 69 |
transform = A.Compose([
|
| 70 |
-
A.Resize(height=
|
| 71 |
A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
| 72 |
A.ToTensorV2(),
|
| 73 |
])
|
| 74 |
|
| 75 |
# ============================================================
|
| 76 |
-
# 2.
|
| 77 |
# ============================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
-
def
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
- image: Frame hiện tại từ webcam (numpy array)
|
| 88 |
-
- buffer_state: List chứa các frame trước đó (để đủ 16 frames)
|
| 89 |
-
- history_log: Text log l���ch sử
|
| 90 |
-
"""
|
| 91 |
-
if image is None:
|
| 92 |
-
return image, buffer_state, "Không có camera", history_log
|
| 93 |
-
|
| 94 |
-
# 1. Detect người bằng YOLO
|
| 95 |
-
results = yolo_model(image, verbose=False, conf=0.5)
|
| 96 |
-
boxes = results[0].boxes.data.cpu().numpy()
|
| 97 |
-
|
| 98 |
-
has_person = False
|
| 99 |
-
bbox = None
|
| 100 |
-
|
| 101 |
-
for x1, y1, x2, y2, conf, cls in boxes:
|
| 102 |
-
if int(cls) == 0: # Person class
|
| 103 |
-
has_person = True
|
| 104 |
-
bbox = (int(x1), int(y1), int(x2), int(y2))
|
| 105 |
-
break # Chỉ lấy người đầu tiên
|
| 106 |
-
|
| 107 |
-
status_text = "Đang chờ tín hiệu..."
|
| 108 |
-
color = (0, 255, 0) # Green
|
| 109 |
|
| 110 |
-
#
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
# Gom buffer thành batch
|
| 124 |
-
video_tensor = torch.stack(current_buffer).unsqueeze(0).to(DEVICE)
|
| 125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
with torch.no_grad():
|
| 127 |
-
|
| 128 |
-
prob = torch.sigmoid(output).item()
|
| 129 |
|
| 130 |
is_fall = prob > 0.5
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
else:
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
# history_log = f"🟢 An toàn - {prob*100:.0f}%\n" + history_log # (Tắt dòng này cho đỡ spam log)
|
| 140 |
-
|
| 141 |
-
# 4. Vẽ Bounding Box
|
| 142 |
-
if bbox:
|
| 143 |
-
x1, y1, x2, y2 = bbox
|
| 144 |
-
cv2.rectangle(image, (x1, y1), (x2, y2), color, 3)
|
| 145 |
-
cv2.putText(image, status_text, (x1, y1 - 10),
|
| 146 |
-
cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
|
| 147 |
-
else:
|
| 148 |
-
cv2.putText(image, "Khong thay nguoi", (20, 50),
|
| 149 |
-
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
|
| 150 |
-
|
| 151 |
-
# Giới hạn log
|
| 152 |
-
if len(history_log) > 1000: history_log = history_log[:1000]
|
| 153 |
-
|
| 154 |
-
return image, current_buffer, status_text, history_log
|
| 155 |
-
|
| 156 |
-
def analyze_uploaded_video(video_path):
|
| 157 |
-
"""Phân tích video upload lên"""
|
| 158 |
-
cap = cv2.VideoCapture(video_path)
|
| 159 |
-
frames = []
|
| 160 |
-
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 161 |
-
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 162 |
-
|
| 163 |
-
# Lấy 16 frames rải đều
|
| 164 |
-
if total_frames >= 32:
|
| 165 |
-
indices = np.linspace(0, total_frames - 1, 32, dtype=int)
|
| 166 |
-
else:
|
| 167 |
-
indices = np.arange(total_frames)
|
| 168 |
-
# Pad nếu thiếu (đơn giản hóa)
|
| 169 |
-
|
| 170 |
-
for i in range(total_frames):
|
| 171 |
-
ret, frame = cap.read()
|
| 172 |
-
if not ret: break
|
| 173 |
-
if i in indices:
|
| 174 |
-
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 175 |
-
frames.append(preprocess_frame(frame))
|
| 176 |
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
|
| 195 |
# ============================================================
|
| 196 |
-
# 3.
|
| 197 |
# ============================================================
|
| 198 |
|
| 199 |
-
|
| 200 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
|
| 202 |
-
with gr.Tab("📹
|
| 203 |
with gr.Row():
|
| 204 |
-
with gr.Column(scale=
|
| 205 |
-
#
|
| 206 |
-
input_cam = gr.Image(sources=["webcam"], type="numpy", label="Camera")
|
| 207 |
-
|
|
|
|
| 208 |
|
| 209 |
-
with gr.Column(scale=
|
| 210 |
-
|
| 211 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
|
| 213 |
-
#
|
| 214 |
-
|
|
|
|
|
|
|
|
|
|
| 215 |
|
| 216 |
-
#
|
| 217 |
input_cam.stream(
|
| 218 |
-
fn=
|
| 219 |
-
inputs=[input_cam
|
| 220 |
-
outputs=[output_cam,
|
| 221 |
-
show_progress=False
|
| 222 |
)
|
|
|
|
|
|
|
|
|
|
| 223 |
|
| 224 |
-
with gr.Tab("
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
|
|
|
|
| 229 |
analyze_btn.click(
|
| 230 |
-
fn=
|
| 231 |
inputs=video_input,
|
| 232 |
-
outputs=result_text
|
| 233 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
|
| 235 |
if __name__ == "__main__":
|
| 236 |
demo.launch()
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
# Cấu hình thư mục tạm cho YOLO (Bắt buộc cho HuggingFace)
|
| 3 |
+
os.environ["YOLO_CONFIG_DIR"] = "/tmp"
|
| 4 |
+
|
| 5 |
import gradio as gr
|
| 6 |
import cv2
|
| 7 |
import numpy as np
|
|
|
|
| 10 |
from torchvision.models import efficientnet_v2_s, EfficientNet_V2_S_Weights
|
| 11 |
import albumentations as A
|
| 12 |
from ultralytics import YOLO
|
| 13 |
+
from datetime import datetime
|
| 14 |
+
import pandas as pd
|
| 15 |
from collections import deque
|
| 16 |
+
from pathlib import Path
|
| 17 |
|
| 18 |
# ============================================================
|
| 19 |
+
# 1. MODEL CONFIGURATION (Giữ nguyên logic của bạn)
|
| 20 |
# ============================================================
|
| 21 |
+
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 22 |
+
MODEL_PATH = "best_model_efficientnet_lstm_224_16.pth"
|
| 23 |
+
|
| 24 |
class EfficientNetLSTM(nn.Module):
|
| 25 |
def __init__(self, hidden_size=256, num_layers=2, dropout=0.5):
|
| 26 |
super(EfficientNetLSTM, self).__init__()
|
|
|
|
| 28 |
self.efficientnet = efficientnet_v2_s(weights=weights)
|
| 29 |
num_features = self.efficientnet.classifier[1].in_features
|
| 30 |
self.efficientnet.classifier = nn.Identity()
|
| 31 |
+
self.lstm = nn.LSTM(input_size=num_features, hidden_size=hidden_size, num_layers=num_layers,
|
| 32 |
+
batch_first=True, dropout=dropout, bidirectional=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
self.fc = nn.Sequential(
|
| 34 |
+
nn.Linear(256*2, 256), nn.ReLU(), nn.Dropout(dropout),
|
| 35 |
+
nn.Linear(256, 128), nn.ReLU(), nn.Dropout(dropout),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
nn.Linear(128, 1)
|
| 37 |
)
|
| 38 |
|
|
|
|
| 42 |
features = self.efficientnet(x)
|
| 43 |
features = features.view(batch_size, num_frames, -1)
|
| 44 |
lstm_out, _ = self.lstm(features)
|
| 45 |
+
output = self.fc(lstm_out[:, -1, :])
|
|
|
|
| 46 |
return output.squeeze()
|
| 47 |
|
| 48 |
+
# Load Models Global
|
|
|
|
|
|
|
|
|
|
| 49 |
print("⏳ Đang tải models...")
|
| 50 |
try:
|
| 51 |
+
model = EfficientNetLSTM().to(DEVICE)
|
|
|
|
| 52 |
model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
|
| 53 |
model.eval()
|
|
|
|
|
|
|
| 54 |
yolo_model = YOLO("yolov8n.pt")
|
| 55 |
print("✅ Đã tải xong models!")
|
| 56 |
except Exception as e:
|
| 57 |
+
print(f"❌ Lỗi: {e}")
|
| 58 |
+
model = None
|
| 59 |
+
yolo_model = None
|
| 60 |
|
| 61 |
# Transform
|
| 62 |
transform = A.Compose([
|
| 63 |
+
A.Resize(height=224, width=224),
|
| 64 |
A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
| 65 |
A.ToTensorV2(),
|
| 66 |
])
|
| 67 |
|
| 68 |
# ============================================================
|
| 69 |
+
# 2. SYSTEM CLASS (QUẢN LÝ TRẠNG THÁI)
|
| 70 |
# ============================================================
|
| 71 |
+
class FallDetectionSystem:
|
| 72 |
+
def __init__(self):
|
| 73 |
+
# Config
|
| 74 |
+
self.num_frames = 16
|
| 75 |
+
self.conf_thres = 0.5
|
| 76 |
+
self.output_dir = Path("fall_videos")
|
| 77 |
+
self.output_dir.mkdir(exist_ok=True)
|
| 78 |
+
|
| 79 |
+
# Realtime Buffers
|
| 80 |
+
self.buffer = deque(maxlen=self.num_frames) # Buffer cho model
|
| 81 |
+
self.pre_buffer = deque(maxlen=30) # Buffer lưu 30 frame trước khi ngã
|
| 82 |
+
self.no_detect_count = 0
|
| 83 |
+
|
| 84 |
+
# Recording State
|
| 85 |
+
self.is_recording = False
|
| 86 |
+
self.video_writer = None
|
| 87 |
+
self.current_video_path = None
|
| 88 |
+
self.fall_start_time = None
|
| 89 |
+
self.fall_frame_count = 0
|
| 90 |
+
|
| 91 |
+
# Logging & History
|
| 92 |
+
self.log_history = [] # Cho realtime text log
|
| 93 |
+
self.saved_videos = [] # List đường dẫn video đã lưu
|
| 94 |
+
self.analysis_history = pd.DataFrame(columns=["Thời gian", "Video", "Kết quả", "Độ tin cậy"])
|
| 95 |
|
| 96 |
+
def reset_realtime_state(self):
|
| 97 |
+
"""Reset trạng thái khi bật lại camera"""
|
| 98 |
+
self.buffer.clear()
|
| 99 |
+
self.pre_buffer.clear()
|
| 100 |
+
self.is_recording = False
|
| 101 |
+
if self.video_writer:
|
| 102 |
+
self.video_writer.release()
|
| 103 |
+
self.video_writer = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
+
# --- LOGIC TAB 1: VIDEO FILE ANALYSIS ---
|
| 106 |
+
def analyze_video(self, video_path):
|
| 107 |
+
if model is None: return "Error loading model", self.analysis_history
|
| 108 |
+
|
| 109 |
+
cap = cv2.VideoCapture(video_path)
|
| 110 |
+
frames = []
|
| 111 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 112 |
+
|
| 113 |
+
# Logic lấy 16 frames (như code cũ)
|
| 114 |
+
if total_frames >= 16:
|
| 115 |
+
indices = np.linspace(0, total_frames - 1, 16, dtype=int)
|
| 116 |
+
else:
|
| 117 |
+
indices = np.arange(total_frames)
|
|
|
|
|
|
|
| 118 |
|
| 119 |
+
for i in range(total_frames):
|
| 120 |
+
ret, frame = cap.read()
|
| 121 |
+
if not ret: break
|
| 122 |
+
if i in indices:
|
| 123 |
+
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 124 |
+
frames.append(transform(image=frame_rgb)['image'])
|
| 125 |
+
cap.release()
|
| 126 |
+
|
| 127 |
+
# Pad frame nếu thiếu
|
| 128 |
+
while len(frames) < 16: frames.append(frames[-1])
|
| 129 |
+
|
| 130 |
+
# Predict
|
| 131 |
+
video_tensor = torch.stack(frames).unsqueeze(0).to(DEVICE)
|
| 132 |
with torch.no_grad():
|
| 133 |
+
prob = torch.sigmoid(model(video_tensor)).item()
|
|
|
|
| 134 |
|
| 135 |
is_fall = prob > 0.5
|
| 136 |
+
result_text = "⚠️ PHÁT HIỆN NGÃ" if is_fall else "✅ AN TOÀN"
|
| 137 |
+
timestamp = datetime.now().strftime("%d/%m/%Y %H:%M")
|
| 138 |
+
filename = os.path.basename(video_path)
|
| 139 |
+
|
| 140 |
+
# Cập nhật DataFrame
|
| 141 |
+
new_row = pd.DataFrame({
|
| 142 |
+
"Thời gian": [timestamp],
|
| 143 |
+
"Video": [filename],
|
| 144 |
+
"Kết quả": [result_text],
|
| 145 |
+
"Độ tin cậy": [f"{prob*100:.2f}%"]
|
| 146 |
+
})
|
| 147 |
+
self.analysis_history = pd.concat([new_row, self.analysis_history], ignore_index=True)
|
| 148 |
+
|
| 149 |
+
return f"{result_text} ({prob*100:.2f}%)", self.analysis_history
|
| 150 |
+
|
| 151 |
+
# --- LOGIC TAB 2: REALTIME PROCESSING ---
|
| 152 |
+
def process_frame(self, image):
|
| 153 |
+
"""Hàm xử lý chính cho mỗi frame từ webcam"""
|
| 154 |
+
if image is None: return image, "", "", []
|
| 155 |
+
|
| 156 |
+
# 1. Chuẩn bị dữ liệu
|
| 157 |
+
frame_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # OpenCV dùng BGR
|
| 158 |
+
current_time = datetime.now().strftime('%H:%M:%S')
|
| 159 |
+
|
| 160 |
+
# Thêm vào pre-buffer (để ghi video lùi lại quá khứ)
|
| 161 |
+
self.pre_buffer.append(frame_bgr)
|
| 162 |
|
| 163 |
+
# 2. Detect Người (YOLO)
|
| 164 |
+
results = yolo_model(frame_bgr, verbose=False, conf=self.conf_thres)
|
| 165 |
+
boxes = results[0].boxes.data.cpu().numpy()
|
| 166 |
+
|
| 167 |
+
person_box = None
|
| 168 |
+
for x1, y1, x2, y2, conf, cls in boxes:
|
| 169 |
+
if int(cls) == 0: # Person
|
| 170 |
+
person_box = (int(x1), int(y1), int(x2), int(y2))
|
| 171 |
+
break
|
| 172 |
+
|
| 173 |
+
# Các biến hiển thị UI
|
| 174 |
+
status_html = "<div style='background:green; color:white; padding:10px; border-radius:5px'>🟢 AN TOÀN</div>"
|
| 175 |
+
log_entry = ""
|
| 176 |
+
|
| 177 |
+
# --- LOGIC XỬ LÝ (Giống app_new.py) ---
|
| 178 |
+
if person_box is None:
|
| 179 |
+
self.no_detect_count += 1
|
| 180 |
+
if self.no_detect_count >= 10:
|
| 181 |
+
self.buffer.clear()
|
| 182 |
+
# Nếu đang ghi video thì dừng lại
|
| 183 |
+
self._stop_recording_if_active(save=True)
|
| 184 |
+
|
| 185 |
+
cv2.putText(frame_bgr, "Khong thay nguoi", (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
|
| 186 |
else:
|
| 187 |
+
self.no_detect_count = 0
|
| 188 |
+
x1, y1, x2, y2 = person_box
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
|
| 190 |
+
# Thêm frame vào buffer LSTM
|
| 191 |
+
frame_tensor = transform(image=image)['image'] # Image đã là RGB từ Gradio
|
| 192 |
+
self.buffer.append(frame_tensor)
|
| 193 |
+
|
| 194 |
+
# Vẽ box người
|
| 195 |
+
cv2.rectangle(frame_bgr, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
| 196 |
+
|
| 197 |
+
# Chỉ predict khi đủ 16 frames
|
| 198 |
+
if len(self.buffer) == self.num_frames:
|
| 199 |
+
video_tensor = torch.stack(list(self.buffer)).unsqueeze(0).to(DEVICE)
|
| 200 |
+
with torch.no_grad():
|
| 201 |
+
output = model(video_tensor)
|
| 202 |
+
prob = torch.sigmoid(output).item()
|
| 203 |
+
|
| 204 |
+
is_fall = prob > 0.5
|
| 205 |
+
|
| 206 |
+
if is_fall:
|
| 207 |
+
# --- PHÁT HIỆN NGÃ ---
|
| 208 |
+
status_html = "<div style='background:red; color:white; padding:10px; border-radius:5px'>🔴 NGUY HIỂM: TÉ NGÃ</div>"
|
| 209 |
+
label = f"TE NGA! ({prob*100:.0f}%)"
|
| 210 |
+
cv2.rectangle(frame_bgr, (x1, y1), (x2, y2), (0, 0, 255), 3)
|
| 211 |
+
cv2.putText(frame_bgr, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
|
| 212 |
+
|
| 213 |
+
log_entry = f"<p style='color:#ff4444'>🔴 {current_time}: Phát hiện ngã ({prob*100:.0f}%)</p>"
|
| 214 |
+
|
| 215 |
+
# BẮT ĐẦU GHI VIDEO (Nếu chưa ghi)
|
| 216 |
+
if not self.is_recording:
|
| 217 |
+
self._start_recording(frame_bgr)
|
| 218 |
+
|
| 219 |
+
# Ghi frame hiện tại
|
| 220 |
+
if self.video_writer:
|
| 221 |
+
self.video_writer.write(frame_bgr)
|
| 222 |
+
self.fall_frame_count += 1
|
| 223 |
+
|
| 224 |
+
else:
|
| 225 |
+
# --- BÌNH THƯỜNG ---
|
| 226 |
+
label = f"An toan ({prob*100:.0f}%)"
|
| 227 |
+
cv2.putText(frame_bgr, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
|
| 228 |
+
# log_entry = f"<p style='color:#44ff44'>🟢 {current_time}: Bình thường</p>" # Uncomment nếu muốn spam log
|
| 229 |
+
|
| 230 |
+
# DỪNG GHI VIDEO (Nếu đang ghi)
|
| 231 |
+
self._stop_recording_if_active(save=True)
|
| 232 |
|
| 233 |
+
else:
|
| 234 |
+
cv2.putText(frame_bgr, f"Buffering: {len(self.buffer)}/16", (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)
|
| 235 |
+
|
| 236 |
+
# Cập nhật Log
|
| 237 |
+
if log_entry:
|
| 238 |
+
self.log_history.insert(0, log_entry)
|
| 239 |
+
if len(self.log_history) > 50: self.log_history.pop()
|
| 240 |
+
|
| 241 |
+
log_html_output = "".join(self.log_history)
|
| 242 |
+
|
| 243 |
+
# Convert back to RGB for Gradio display
|
| 244 |
+
frame_rgb_out = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
|
| 245 |
+
|
| 246 |
+
return frame_rgb_out, status_html, log_html_output, self.saved_videos
|
| 247 |
+
|
| 248 |
+
# --- HELPER METHODS FOR RECORDING ---
|
| 249 |
+
def _start_recording(self, frame_sample):
|
| 250 |
+
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
| 251 |
+
filename = f"fall_detect_{timestamp}.mp4"
|
| 252 |
+
filepath = self.output_dir / filename
|
| 253 |
+
|
| 254 |
+
h, w = frame_sample.shape[:2]
|
| 255 |
+
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # mp4v tương thích tốt hơn
|
| 256 |
+
self.video_writer = cv2.VideoWriter(str(filepath), fourcc, 20.0, (w, h))
|
| 257 |
+
|
| 258 |
+
self.is_recording = True
|
| 259 |
+
self.current_video_path = str(filepath)
|
| 260 |
+
self.fall_frame_count = 0
|
| 261 |
+
|
| 262 |
+
# Ghi lại các frame quá khứ (30 frame trước khi ngã)
|
| 263 |
+
for past_frame in self.pre_buffer:
|
| 264 |
+
self.video_writer.write(past_frame)
|
| 265 |
+
|
| 266 |
+
def _stop_recording_if_active(self, save=True):
|
| 267 |
+
if self.is_recording and self.video_writer:
|
| 268 |
+
self.video_writer.release()
|
| 269 |
+
self.video_writer = None
|
| 270 |
+
self.is_recording = False
|
| 271 |
+
|
| 272 |
+
# Logic lưu video
|
| 273 |
+
if save and self.fall_frame_count > 10: # Chỉ lưu nếu video đủ dài
|
| 274 |
+
self.saved_videos.insert(0, self.current_video_path)
|
| 275 |
+
else:
|
| 276 |
+
# Xóa file rác nếu video quá ngắn
|
| 277 |
+
try:
|
| 278 |
+
os.remove(self.current_video_path)
|
| 279 |
+
except: pass
|
| 280 |
+
|
| 281 |
+
# Khởi tạo hệ thống
|
| 282 |
+
system = FallDetectionSystem()
|
| 283 |
|
| 284 |
# ============================================================
|
| 285 |
+
# 3. GRADIO UI
|
| 286 |
# ============================================================
|
| 287 |
|
| 288 |
+
# Custom CSS
|
| 289 |
+
css = """
|
| 290 |
+
.status-box { text-align: center; font-size: 1.2em; font-weight: bold; margin-bottom: 10px; }
|
| 291 |
+
.log-container { height: 300px; overflow-y: auto; background: #222; padding: 10px; border-radius: 8px; border: 1px solid #444; }
|
| 292 |
+
"""
|
| 293 |
+
|
| 294 |
+
with gr.Blocks(title="Hệ thống Dự đoán Fall", css=css, theme=gr.themes.Soft()) as demo:
|
| 295 |
+
gr.Markdown("# 🎈 Hệ thống Phát hiện Té ngã (AI Powered)")
|
| 296 |
|
| 297 |
+
with gr.Tab("📹 Dự đoán Realtime"):
|
| 298 |
with gr.Row():
|
| 299 |
+
with gr.Column(scale=2):
|
| 300 |
+
# Input Webcam
|
| 301 |
+
input_cam = gr.Image(sources=["webcam"], type="numpy", label="Camera Input")
|
| 302 |
+
# Output đã vẽ box
|
| 303 |
+
output_cam = gr.Image(label="Kết quả Xử lý")
|
| 304 |
|
| 305 |
+
with gr.Column(scale=1):
|
| 306 |
+
# Trạng thái An toàn/Nguy hiểm
|
| 307 |
+
status_html = gr.HTML(value="<div style='background:gray; color:white; padding:10px; border-radius:5px'>⚪ CHỜ CAMERA</div>", elem_classes="status-box")
|
| 308 |
+
|
| 309 |
+
# Nhật ký Log
|
| 310 |
+
gr.Markdown("### 📝 Nhật ký phát hiện")
|
| 311 |
+
log_display = gr.HTML(elem_classes="log-container")
|
| 312 |
|
| 313 |
+
# Section Video đã lưu
|
| 314 |
+
gr.Markdown("---")
|
| 315 |
+
gr.Markdown("### 📂 Video té ngã đã ghi lại tự động")
|
| 316 |
+
# Gallery hiển thị video đã lưu
|
| 317 |
+
gallery = gr.Gallery(label="Video Té Ngã", columns=3, height="auto", object_fit="contain")
|
| 318 |
|
| 319 |
+
# Sự kiện Stream Realtime
|
| 320 |
input_cam.stream(
|
| 321 |
+
fn=system.process_frame,
|
| 322 |
+
inputs=[input_cam],
|
| 323 |
+
outputs=[output_cam, status_html, log_display, gallery],
|
| 324 |
+
show_progress=False
|
| 325 |
)
|
| 326 |
+
|
| 327 |
+
# Sự kiện xóa buffer khi tắt/bật camera (clear log)
|
| 328 |
+
input_cam.clear(fn=system.reset_realtime_state, inputs=None, outputs=None)
|
| 329 |
|
| 330 |
+
with gr.Tab("📹 Dự đoán qua Video"):
|
| 331 |
+
with gr.Row():
|
| 332 |
+
with gr.Column():
|
| 333 |
+
video_input = gr.Video(label="Tải video lên")
|
| 334 |
+
analyze_btn = gr.Button("🔵 Bắt đầu phân tích", variant="primary")
|
| 335 |
+
result_text = gr.Label(label="Kết quả phân tích")
|
| 336 |
+
|
| 337 |
+
with gr.Column():
|
| 338 |
+
gr.Markdown("### 📊 Lịch sử phân tích")
|
| 339 |
+
history_table = gr.Dataframe(
|
| 340 |
+
headers=["Thời gian", "Video", "Kết quả", "Độ tin cậy"],
|
| 341 |
+
datatype=["str", "str", "str", "str"],
|
| 342 |
+
value=pd.DataFrame(columns=["Thời gian", "Video", "Kết quả", "Độ tin cậy"]),
|
| 343 |
+
interactive=False
|
| 344 |
+
)
|
| 345 |
+
clear_hist_btn = gr.Button("🗑️ Xóa lịch sử")
|
| 346 |
|
| 347 |
+
# Sự kiện nút bấm
|
| 348 |
analyze_btn.click(
|
| 349 |
+
fn=system.analyze_video,
|
| 350 |
inputs=video_input,
|
| 351 |
+
outputs=[result_text, history_table]
|
| 352 |
)
|
| 353 |
+
|
| 354 |
+
def clear_history():
|
| 355 |
+
system.analysis_history = pd.DataFrame(columns=["Thời gian", "Video", "Kết quả", "Độ tin cậy"])
|
| 356 |
+
return system.analysis_history
|
| 357 |
+
|
| 358 |
+
clear_hist_btn.click(fn=clear_history, outputs=history_table)
|
| 359 |
|
| 360 |
if __name__ == "__main__":
|
| 361 |
demo.launch()
|
requirements.txt
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
gradio
|
| 2 |
numpy
|
|
|
|
| 3 |
opencv-python-headless
|
| 4 |
torch
|
| 5 |
torchvision
|
|
|
|
| 1 |
gradio
|
| 2 |
numpy
|
| 3 |
+
pandas
|
| 4 |
opencv-python-headless
|
| 5 |
torch
|
| 6 |
torchvision
|