Upload 4 files
Browse files- README.md +73 -3
- config.json +5 -0
- gesture_model.bin +3 -0
- main.py +100 -0
README.md
CHANGED
|
@@ -1,3 +1,73 @@
|
|
| 1 |
-
---
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language: ru
|
| 3 |
+
license: mit
|
| 4 |
+
tags:
|
| 5 |
+
- pytorch
|
| 6 |
+
- computer-vision
|
| 7 |
+
- image-classification
|
| 8 |
+
- gesture-recognition
|
| 9 |
+
- custom-model
|
| 10 |
+
- real-time
|
| 11 |
+
- webcam
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
# ✋ Распознаватель жестов руки (GestureAI_v1)
|
| 15 |
+
|
| 16 |
+
Привет! 👋
|
| 17 |
+
|
| 18 |
+
Это моя собственная свёрточная нейросеть, обученная **с нуля** распознавать жесты руки в реальном времени через веб-камеру.
|
| 19 |
+
Модель понимает **5 командных жестов** и может использоваться для управления компьютером (например, движением мыши) или в других проектах.
|
| 20 |
+
|
| 21 |
+
---
|
| 22 |
+
|
| 23 |
+
## 🚀 Особенности модели
|
| 24 |
+
|
| 25 |
+
- ⭐ Модель была обучена **на 24 760 реальных изображений голубой перчатки**, из-за чего смогла **научится обобщать за 1 эпоху.**
|
| 26 |
+
- 🧠 Архитектура была сделана и обучена с нуля, вместе с датасетом (никаких предобученных моделей!)
|
| 27 |
+
- 🎥 Все 24 760 изображений было сделанно 1 человеком, но это **никак не помешало** модели обобщать
|
| 28 |
+
|
| 29 |
+
---
|
| 30 |
+
|
| 31 |
+
## 📊 Распознаваемые жесты
|
| 32 |
+
|
| 33 |
+
| Жест | Класс | Движение мышки |
|
| 34 |
+
|------------|----------------|---------------------------|
|
| 35 |
+
| 👆 | `up` | Движение вверх |
|
| 36 |
+
| 👇 | `down` | Движение вниз |
|
| 37 |
+
| 👈 | `left` | Движение влево |
|
| 38 |
+
| 👉 | `right` | Движение вправо |
|
| 39 |
+
| 🙂 | `nothing` | Никаких действий |
|
| 40 |
+
|
| 41 |
+
---
|
| 42 |
+
|
| 43 |
+
## 🧠 Архитектура модели
|
| 44 |
+
|
| 45 |
+
Модель реализована на `PyTorch` в виде последовательной CNN (`nn.Sequential`).
|
| 46 |
+
|
| 47 |
+
```python
|
| 48 |
+
class GestureCNN(nn.Module):
|
| 49 |
+
def __init__(self, num_classes):
|
| 50 |
+
super().__init__()
|
| 51 |
+
self.net = nn.Sequential(
|
| 52 |
+
nn.Conv2d(3, 32, 3, padding=1), nn.BatchNorm2d(32), nn.ReLU(), nn.MaxPool2d(2),
|
| 53 |
+
nn.Conv2d(32, 64, 3, padding=1), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2),
|
| 54 |
+
nn.Conv2d(64, 128, 3, padding=1), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2),
|
| 55 |
+
nn.Conv2d(128, 256, 3, padding=1), nn.BatchNorm2d(256), nn.ReLU(), nn.AdaptiveAvgPool2d(1),
|
| 56 |
+
nn.Flatten(),
|
| 57 |
+
nn.Dropout(0.3),
|
| 58 |
+
nn.Linear(256, 128), nn.ReLU(), nn.Dropout(0.3),
|
| 59 |
+
nn.Linear(128, num_classes)
|
| 60 |
+
)
|
| 61 |
+
def forward(self, x):
|
| 62 |
+
return self.net(x)
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
## 📦 Как использовать
|
| 66 |
+
|
| 67 |
+
### 1. Установка зависимостей
|
| 68 |
+
```bash
|
| 69 |
+
pip install torch pyautogui cv2
|
| 70 |
+
|
| 71 |
+
### 2. Запуск
|
| 72 |
+
```bash
|
| 73 |
+
py main.py
|
config.json
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": ["GestureAI_v1"],
|
| 3 |
+
"model_type": "custom_gesture_cnn",
|
| 4 |
+
"num_classes": 5
|
| 5 |
+
}
|
gesture_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3eee42d77d367b321345578f96216429c406a866ba0610ef4d598702b82d3625
|
| 3 |
+
size 1706254
|
main.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import cv2
|
| 2 |
+
import torch
|
| 3 |
+
import torch.nn as nn
|
| 4 |
+
import pyautogui
|
| 5 |
+
from collections import deque
|
| 6 |
+
|
| 7 |
+
MODEL_PATH = "gesture_model.bin"
|
| 8 |
+
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 9 |
+
IMG_SIZE = 128
|
| 10 |
+
SMOOTHING = deque(maxlen=5)
|
| 11 |
+
MOVE_STEP = 0 # пикселей за шаг
|
| 12 |
+
MOVE_EVERY_N_FRAMES = 1 # каждый кадр (при 30 fps будет ~120 пикс/сек с шагом 4)
|
| 13 |
+
|
| 14 |
+
# ---------- Модель ----------
|
| 15 |
+
class GestureCNN(nn.Module):
|
| 16 |
+
def __init__(self, num_classes):
|
| 17 |
+
super().__init__()
|
| 18 |
+
self.net = nn.Sequential(
|
| 19 |
+
nn.Conv2d(3, 32, 3, padding=1), nn.BatchNorm2d(32), nn.ReLU(), nn.MaxPool2d(2),
|
| 20 |
+
nn.Conv2d(32, 64, 3, padding=1), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2),
|
| 21 |
+
nn.Conv2d(64, 128, 3, padding=1), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2),
|
| 22 |
+
nn.Conv2d(128, 256, 3, padding=1), nn.BatchNorm2d(256), nn.ReLU(), nn.AdaptiveAvgPool2d(1),
|
| 23 |
+
nn.Flatten(),
|
| 24 |
+
nn.Dropout(0.3),
|
| 25 |
+
nn.Linear(256, 128), nn.ReLU(), nn.Dropout(0.3),
|
| 26 |
+
nn.Linear(128, num_classes)
|
| 27 |
+
)
|
| 28 |
+
def forward(self, x):
|
| 29 |
+
return self.net(x)
|
| 30 |
+
|
| 31 |
+
classes = ["down", "left", "nothing", "right", "up"] # алфавитный порядок ImageFolder
|
| 32 |
+
model = GestureCNN(num_classes=len(classes))
|
| 33 |
+
model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
|
| 34 |
+
model.eval().to(DEVICE)
|
| 35 |
+
|
| 36 |
+
cap = cv2.VideoCapture(0)
|
| 37 |
+
if not cap.isOpened(): exit()
|
| 38 |
+
|
| 39 |
+
print("Управление жестами. 'q' для выхода.")
|
| 40 |
+
last_action = None
|
| 41 |
+
|
| 42 |
+
frame_counter = 0
|
| 43 |
+
while True:
|
| 44 |
+
ret, frame = cap.read()
|
| 45 |
+
if not ret: break
|
| 46 |
+
frame = cv2.flip(frame, 1)
|
| 47 |
+
h, w = frame.shape[:2]
|
| 48 |
+
size = min(w, h) // 2
|
| 49 |
+
x1, y1 = w//2 - size//2, h//2 - size//2
|
| 50 |
+
x2, y2 = x1 + size, y1 + size
|
| 51 |
+
|
| 52 |
+
# Обработка ROI
|
| 53 |
+
roi = frame[y1:y2, x1:x2]
|
| 54 |
+
roi_rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
|
| 55 |
+
roi_resized = cv2.resize(roi_rgb, (IMG_SIZE, IMG_SIZE))
|
| 56 |
+
tensor = torch.from_numpy(roi_resized).permute(2,0,1).unsqueeze(0).float()/255.0
|
| 57 |
+
tensor = tensor.to(DEVICE)
|
| 58 |
+
|
| 59 |
+
with torch.no_grad():
|
| 60 |
+
output = model(tensor)
|
| 61 |
+
_, pred = torch.max(output, 1)
|
| 62 |
+
gesture = classes[pred.item()]
|
| 63 |
+
|
| 64 |
+
SMOOTHING.append(gesture)
|
| 65 |
+
|
| 66 |
+
# Определяем стабильный жест (только когда очередь полна и все значения одинаковы)
|
| 67 |
+
if len(SMOOTHING) == SMOOTHING.maxlen and all(g == SMOOTHING[0] for g in SMOOTHING):
|
| 68 |
+
stable_gesture = SMOOTHING[0]
|
| 69 |
+
else:
|
| 70 |
+
stable_gesture = None
|
| 71 |
+
|
| 72 |
+
# === НЕПРЕРЫВНОЕ ДВИЖЕНИЕ ===
|
| 73 |
+
if stable_gesture in ("up", "down", "left", "right"):
|
| 74 |
+
frame_counter += 1
|
| 75 |
+
if frame_counter >= MOVE_EVERY_N_FRAMES:
|
| 76 |
+
dx, dy = 0, 0
|
| 77 |
+
if stable_gesture == "up":
|
| 78 |
+
dy = -MOVE_STEP
|
| 79 |
+
elif stable_gesture == "down":
|
| 80 |
+
dy = MOVE_STEP
|
| 81 |
+
elif stable_gesture == "left":
|
| 82 |
+
dx = -MOVE_STEP
|
| 83 |
+
elif stable_gesture == "right":
|
| 84 |
+
dx = MOVE_STEP
|
| 85 |
+
MOVE_STEP = MOVE_STEP + 1
|
| 86 |
+
pyautogui.moveRel(dx, dy, duration=0) # моментальное смещение
|
| 87 |
+
frame_counter = 0
|
| 88 |
+
else:
|
| 89 |
+
# Если жест не направленный или нестабильный – сбрасываем счётчик и ничего не двигаем
|
| 90 |
+
frame_counter = 0
|
| 91 |
+
MOVE_STEP = 0
|
| 92 |
+
|
| 93 |
+
# Отрисовка и выход
|
| 94 |
+
cv2.rectangle(frame, (x1,y1), (x2,y2), (0,255,0), 2)
|
| 95 |
+
cv2.putText(frame, f"Gesture: {gesture}", (10,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)
|
| 96 |
+
cv2.imshow("Gesture Control", frame)
|
| 97 |
+
if cv2.waitKey(1) & 0xFF == ord('q'): break
|
| 98 |
+
|
| 99 |
+
cap.release()
|
| 100 |
+
cv2.destroyAllWindows()
|