Spaces:

ltlonggg
/

fall_detection

Sleeping

App Files Files Community

ltlonggg commited on Nov 19, 2025

Commit

794b89a

1 Parent(s): 76e0ae2

Add application file

Browse files

Files changed (5) hide show

app.py +236 -0
best_model_efficientnet_lstm_v2.pth +3 -0
packages.txt +2 -0
requirements.txt +7 -0
yolov8n.pt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,236 @@

+import gradio as gr
+import cv2
+import numpy as np
+import torch
+import torch.nn as nn
+from torchvision.models import efficientnet_v2_s, EfficientNet_V2_S_Weights
+import albumentations as A
+from ultralytics import YOLO
+from collections import deque
+# ============================================================
+# 1. CẤU HÌNH MODEL (Giữ nguyên logic cũ)
+# ============================================================
+class EfficientNetLSTM(nn.Module):
+    def __init__(self, hidden_size=256, num_layers=2, dropout=0.5):
+        super(EfficientNetLSTM, self).__init__()
+        weights = EfficientNet_V2_S_Weights.IMAGENET1K_V1
+        self.efficientnet = efficientnet_v2_s(weights=weights)
+        num_features = self.efficientnet.classifier[1].in_features
+        self.efficientnet.classifier = nn.Identity()
+        self.lstm = nn.LSTM(
+            input_size=num_features,
+            hidden_size=hidden_size,
+            num_layers=num_layers,
+            batch_first=True,
+            dropout=dropout if num_layers > 1 else 0,
+            bidirectional=True
+        )
+        self.fc = nn.Sequential(
+            nn.Linear(256*2, 256),
+            nn.ReLU(inplace=True),
+            nn.Dropout(dropout),
+            nn.Linear(256, 128),
+            nn.ReLU(inplace=True),
+            nn.Dropout(dropout),
+            nn.Linear(128, 1)
+        )
+    def forward(self, x):
+        batch_size, num_frames, c, h, w = x.shape
+        x = x.view(batch_size * num_frames, c, h, w)
+        features = self.efficientnet(x)
+        features = features.view(batch_size, num_frames, -1)
+        lstm_out, _ = self.lstm(features)
+        final_features = lstm_out[:, -1, :]
+        output = self.fc(final_features)
+        return output.squeeze()
+# Load Model (Global)
+DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+MODEL_PATH = "best_model_efficientnet_lstm_224_16.pth" # Đảm bảo file này nằm cùng thư mục
+print("⏳ Đang tải models...")
+try:
+    # Load Fall Detection Model
+    model = EfficientNetLSTM(hidden_size=256, num_layers=2, dropout=0.5).to(DEVICE)
+    model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
+    model.eval()
+    # Load YOLO
+    yolo_model = YOLO("yolov8n.pt")
+    print("✅ Đã tải xong models!")
+except Exception as e:
+    print(f"❌ Lỗi tải model: {e}")
+# Transform
+transform = A.Compose([
+    A.Resize(height=224, width=224),
+    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+    A.ToTensorV2(),
+])
+# ============================================================
+# 2. HÀM XỬ LÝ LOGIC
+# ============================================================
+def preprocess_frame(frame_rgb):
+    """Chuyển đổi 1 frame ảnh thành tensor cho model"""
+    augmented = transform(image=frame_rgb)
+    return augmented['image']
+def predict_webcam_stream(image, buffer_state, history_log):
+    """
+    Hàm này chạy liên tục cho mỗi frame từ webcam.
+    - image: Frame hiện tại từ webcam (numpy array)
+    - buffer_state: List chứa các frame trước đó (để đủ 16 frames)
+    - history_log: Text log lịch sử
+    """
+    if image is None:
+        return image, buffer_state, "Không có camera", history_log
+    # 1. Detect người bằng YOLO
+    results = yolo_model(image, verbose=False, conf=0.5)
+    boxes = results[0].boxes.data.cpu().numpy()
+    has_person = False
+    bbox = None
+    for x1, y1, x2, y2, conf, cls in boxes:
+        if int(cls) == 0: # Person class
+            has_person = True
+            bbox = (int(x1), int(y1), int(x2), int(y2))
+            break # Chỉ lấy người đầu tiên
+    status_text = "Đang chờ tín hiệu..."
+    color = (0, 255, 0) # Green
+    # 2. Xử lý Buffer cho LSTM
+    # Chuyển đổi ảnh sang Tensor và thêm vào buffer
+    frame_tensor = preprocess_frame(image)
+    # buffer_state là một list, ta quản lý nó như deque
+    current_buffer = buffer_state if buffer_state is not None else []
+    current_buffer.append(frame_tensor)
+    if len(current_buffer) > 16:
+        current_buffer.pop(0) # Xóa frame cũ nhất
+    # 3. Dự đoán Fall (Chỉ khi đủ 16 frames và có người)
+    if len(current_buffer) == 16:
+        # Gom buffer thành batch
+        video_tensor = torch.stack(current_buffer).unsqueeze(0).to(DEVICE)
+        with torch.no_grad():
+            output = model(video_tensor)
+            prob = torch.sigmoid(output).item()
+        is_fall = prob > 0.5
+        if is_fall:
+            status_text = f"⚠️ PHÁT HIỆN NGÃ! ({prob*100:.1f}%)"
+            color = (0, 0, 255) # Red
+            history_log = f"🔴 Ngã - {prob*100:.0f}%\n" + history_log
+        else:
+            status_text = f"✅ An toàn ({prob*100:.1f}%)"
+            color = (0, 255, 0) # Green
+            # history_log = f"🟢 An toàn - {prob*100:.0f}%\n" + history_log # (Tắt dòng này cho đỡ spam log)
+    # 4. Vẽ Bounding Box
+    if bbox:
+        x1, y1, x2, y2 = bbox
+        cv2.rectangle(image, (x1, y1), (x2, y2), color, 3)
+        cv2.putText(image, status_text, (x1, y1 - 10),
+                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
+    else:
+        cv2.putText(image, "Khong thay nguoi", (20, 50),
+                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
+    # Giới hạn log
+    if len(history_log) > 1000: history_log = history_log[:1000]
+    return image, current_buffer, status_text, history_log
+def analyze_uploaded_video(video_path):
+    """Phân tích video upload lên"""
+    cap = cv2.VideoCapture(video_path)
+    frames = []
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    # Lấy 16 frames rải đều
+    if total_frames >= 16:
+        indices = np.linspace(0, total_frames - 1, 16, dtype=int)
+    else:
+        indices = np.arange(total_frames)
+        # Pad nếu thiếu (đơn giản hóa)
+    for i in range(total_frames):
+        ret, frame = cap.read()
+        if not ret: break
+        if i in indices:
+            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            frames.append(preprocess_frame(frame))
+    cap.release()
+    if len(frames) < 16:
+        # Pad frame cuối nếu thiếu
+        while len(frames) < 16:
+            frames.append(frames[-1])
+    video_tensor = torch.stack(frames).unsqueeze(0).to(DEVICE)
+    with torch.no_grad():
+        output = model(video_tensor)
+        prob = torch.sigmoid(output).item()
+    if prob > 0.5:
+        return f"⚠️ CẢNH BÁO: Video có hành động NGÃ.\nTỷ lệ: {prob*100:.2f}%"
+    else:
+        return f"✅ Video AN TOÀN.\nTỷ lệ ngã: {prob*100:.2f}%"
+# ============================================================
+# 3. GIAO DIỆN GRADIO
+# ============================================================
+with gr.Blocks(title="Hệ thống Fall Detection", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🎥 Hệ thống Phát hiện Té ngã (EfficientNet + LSTM)")
+    with gr.Tab("📹 Camera Realtime"):
+        with gr.Row():
+            with gr.Column(scale=7):
+                # Input Webcam: streaming=True để gửi liên tục
+                input_cam = gr.Image(source="webcam", streaming=True, label="Camera")
+                output_cam = gr.Image(label="Kết quả xử lý")
+            with gr.Column(scale=3):
+                status_label = gr.Label(label="Trạng thái hiện tại")
+                log_box = gr.Textbox(label="Nhật ký phát hiện", lines=10)
+        # Biến State để lưu buffer frames giữa các lần gọi hàm
+        buffer_state = gr.State([])
+        # Sự kiện: Khi input cam thay đổi -> gọi hàm xử lý -> cập nhật output
+        input_cam.change(
+            fn=predict_webcam_stream,
+            inputs=[input_cam, buffer_state, log_box],
+            outputs=[output_cam, buffer_state, status_label, log_box],
+            show_progress=False
+        )
+    with gr.Tab("📂 Phân tích Video File"):
+        video_input = gr.Video(label="Tải video lên")
+        analyze_btn = gr.Button("Phân tích Video", variant="primary")
+        result_text = gr.Textbox(label="Kết quả")
+        analyze_btn.click(
+            fn=analyze_uploaded_video,
+            inputs=video_input,
+            outputs=result_text
+        )
+if __name__ == "__main__":
+    demo.launch()

best_model_efficientnet_lstm_v2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c80bc58183c7c98639b2fee7fa441496f3d3fec20eaf03939402546e215fe4c3
+size 101199140

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ libgl1
2	+ libglib2.0-0

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio
+numpy
+opencv-python-headless
+torch
+torchvision
+albumentations
+ultralytics

yolov8n.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f59b3d833e2ff32e194b5bb8e08d211dc7c5bdf144b90d2c8412c47ccfc83b36
+size 6549796