LEGIONM36 commited on
Commit
6028065
·
verified ·
1 Parent(s): eaa2357

Upload 4 files

Browse files
Files changed (4) hide show
  1. best_model_gru.pth +3 -0
  2. model.py +80 -0
  3. readme.md +22 -0
  4. train.py +288 -0
best_model_gru.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fa210e987f9c4953ad894831b39110c295c93985b4909c3710b362620173486
3
+ size 42486857
model.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ class ViolenceGRU(nn.Module):
5
+ def __init__(self):
6
+ super(ViolenceGRU, self).__init__()
7
+
8
+ # 2D CNN Backbone (Applied to each frame independently)
9
+ self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
10
+ self.bn1 = nn.BatchNorm2d(32)
11
+ self.pool1 = nn.MaxPool2d(2, 2)
12
+
13
+ self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
14
+ self.bn2 = nn.BatchNorm2d(64)
15
+ self.pool2 = nn.MaxPool2d(2, 2)
16
+
17
+ self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
18
+ self.bn3 = nn.BatchNorm2d(128)
19
+ self.pool3 = nn.MaxPool2d(2, 2)
20
+
21
+ self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
22
+ self.bn4 = nn.BatchNorm2d(256)
23
+ self.pool4 = nn.MaxPool2d(2, 2)
24
+
25
+ self.relu = nn.ReLU()
26
+ self.dropout = nn.Dropout(0.5)
27
+
28
+ # Calculate feature dim after CNN
29
+ # Input: 112x112
30
+ # Pool1: 56x56
31
+ # Pool2: 28x28
32
+ # Pool3: 14x14
33
+ # Pool4: 7x7
34
+ self.feature_dim = 256 * 7 * 7
35
+
36
+ # GRU Layer
37
+ # input_size: Feature vector from CNN
38
+ # hidden_size: 256
39
+ # num_layers: 2
40
+ self.gru = nn.GRU(input_size=self.feature_dim, hidden_size=256, num_layers=2, batch_first=True, dropout=0.5)
41
+
42
+ self.fc = nn.Linear(256, 2) # Binary Classification
43
+
44
+ def forward(self, x):
45
+ # x shape from Dataset: (Batch, C, Seq, H, W)
46
+ b, c, s, h, w = x.size()
47
+
48
+ # Reshape for 2D CNN: (Batch * Seq, C, H, W)
49
+ x = x.permute(0, 2, 1, 3, 4).contiguous() # (B, S, C, H, W)
50
+ x = x.view(b * s, c, h, w)
51
+
52
+ # Pass through CNN
53
+ x = self.relu(self.bn1(self.conv1(x)))
54
+ x = self.pool1(x)
55
+
56
+ x = self.relu(self.bn2(self.conv2(x)))
57
+ x = self.pool2(x)
58
+
59
+ x = self.relu(self.bn3(self.conv3(x)))
60
+ x = self.pool3(x)
61
+
62
+ x = self.relu(self.bn4(self.conv4(x)))
63
+ x = self.pool4(x)
64
+
65
+ # Flatten features: (Batch * Seq, Feature_Dim)
66
+ x = x.view(b * s, -1)
67
+
68
+ # Reshape for GRU: (Batch, Seq, Feature_Dim)
69
+ x = x.view(b, s, -1)
70
+
71
+ # GRU Pass
72
+ # out: (Batch, Seq, Hidden_Dim)
73
+ out, _ = self.gru(x)
74
+
75
+ # Take the last time step output for classification
76
+ out = out[:, -1, :]
77
+
78
+ out = self.dropout(out)
79
+ out = self.fc(out)
80
+ return out
readme.md ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Violence GRU Model
2
+
3
+ ## Model Architecture
4
+ - **Type**: CNN-GRU Hybrid
5
+ - **Components**:
6
+ - **CNN Backbone**: 4-Layer 2D CNN to extract spatial features from each frame.
7
+ - **Recurrent Unit**: 2-Layer GRU (Gated Recurrent Unit) to model temporal dependencies.
8
+ - **Classifier**: Fully Connected Layer.
9
+ - **Input**: Video sequence of 16 frames, resized to 112x112.
10
+ - **Output**: Binary Classification (Violence vs No-Violence).
11
+
12
+ ## Dataset Structure
13
+ The code expects a `Dataset` folder in the parent directory.
14
+ ```
15
+ Dataset/
16
+ ├── violence/
17
+ └── no-violence/
18
+ ```
19
+
20
+ ## How to Run
21
+ 1. Install dependencies: `torch`, `opencv-python`, `scikit-learn`, `numpy`.
22
+ 2. Run `python train.py`.
train.py ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import numpy as np
4
+ import torch
5
+ import torch.nn as nn
6
+ import torch.optim as optim
7
+ from torch.utils.data import Dataset, DataLoader
8
+ from sklearn.model_selection import train_test_split
9
+ from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
10
+ import time
11
+ from model import ViolenceGRU
12
+
13
+ # --- Configuration ---
14
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
15
+ DATASET_DIR = os.path.join(BASE_DIR, "Dataset")
16
+ MODEL_SAVE_PATH = "best_model_gru.pth"
17
+
18
+ # Hyperparameters
19
+ IMG_SIZE = 112
20
+ SEQ_LEN = 16
21
+ BATCH_SIZE = 50
22
+ EPOCHS = 80
23
+ LEARNING_RATE = 1e-4
24
+ PATIENCE = 5
25
+
26
+ # --- 1. Data Augmentation ---
27
+ def augment_video_frames(frames):
28
+ """
29
+ Apply augmentation to a sequence of frames.
30
+ """
31
+ augmented_frames = []
32
+
33
+ do_flip = np.random.random() > 0.5
34
+ do_rotate = np.random.random() > 0.5
35
+ angle = np.random.randint(-15, 15) if do_rotate else 0
36
+
37
+ brightness = np.random.uniform(0.8, 1.2)
38
+ contrast = np.random.uniform(0.8, 1.2)
39
+
40
+ for frame in frames:
41
+ new_frame = frame.copy()
42
+
43
+ if do_flip:
44
+ new_frame = cv2.flip(new_frame, 1)
45
+
46
+ if do_rotate:
47
+ (h, w) = new_frame.shape[:2]
48
+ center = (w // 2, h // 2)
49
+ M = cv2.getRotationMatrix2D(center, angle, 1.0)
50
+ new_frame = cv2.warpAffine(new_frame, M, (w, h))
51
+
52
+ new_frame = cv2.convertScaleAbs(new_frame, alpha=contrast, beta=(brightness-1)*50)
53
+ augmented_frames.append(new_frame)
54
+
55
+ return np.array(augmented_frames)
56
+
57
+ # --- Dataset Class ---
58
+ class ViolenceDataset(Dataset):
59
+ def __init__(self, video_paths, labels, transform=None, augment=False):
60
+ self.video_paths = video_paths
61
+ self.labels = labels
62
+ self.augment = augment
63
+
64
+ def __len__(self):
65
+ return len(self.video_paths)
66
+
67
+ def __getitem__(self, idx):
68
+ path = self.video_paths[idx]
69
+ label = self.labels[idx]
70
+
71
+ try:
72
+ frames = self._load_video(path)
73
+ except Exception as e:
74
+ print(f"Error loading {path}: {e}")
75
+ frames = np.zeros((SEQ_LEN, IMG_SIZE, IMG_SIZE, 3), dtype=np.uint8)
76
+
77
+ if self.augment:
78
+ frames = augment_video_frames(frames)
79
+
80
+ # Normalize and Channel First (C, D, H, W)
81
+ frames = torch.tensor(frames, dtype=torch.float32)
82
+ frames = frames / 255.0
83
+ frames = frames.permute(3, 0, 1, 2)
84
+
85
+ return frames, label
86
+
87
+ def _load_video(self, path):
88
+ cap = cv2.VideoCapture(path)
89
+ frames = []
90
+ try:
91
+ while True:
92
+ ret, frame = cap.read()
93
+ if not ret:
94
+ break
95
+ frame = cv2.resize(frame, (IMG_SIZE, IMG_SIZE))
96
+ frames.append(frame)
97
+ finally:
98
+ cap.release()
99
+
100
+ if len(frames) == 0:
101
+ return np.zeros((SEQ_LEN, IMG_SIZE, IMG_SIZE, 3), dtype=np.uint8)
102
+
103
+ if len(frames) < SEQ_LEN:
104
+ while len(frames) < SEQ_LEN:
105
+ frames.append(frames[-1])
106
+ elif len(frames) > SEQ_LEN:
107
+ indices = np.linspace(0, len(frames)-1, SEQ_LEN, dtype=int)
108
+ frames = [frames[i] for i in indices]
109
+
110
+ return np.array(frames)
111
+
112
+ # --- 2. Data Splitting ---
113
+ def prepare_data():
114
+ violence_dir = os.path.join(DATASET_DIR, 'violence')
115
+ no_violence_dir = os.path.join(DATASET_DIR, 'no-violence')
116
+
117
+ if not os.path.exists(violence_dir) or not os.path.exists(no_violence_dir):
118
+ raise FileNotFoundError(f"Dataset directories not found. Expected {violence_dir} and {no_violence_dir}")
119
+
120
+ violence_files = [os.path.join(violence_dir, f) for f in os.listdir(violence_dir) if f.endswith('.avi') or f.endswith('.mp4')]
121
+ no_violence_files = [os.path.join(no_violence_dir, f) for f in os.listdir(no_violence_dir) if f.endswith('.avi') or f.endswith('.mp4')]
122
+
123
+ X = violence_files + no_violence_files
124
+ y = [1] * len(violence_files) + [0] * len(no_violence_files)
125
+
126
+ X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.30, random_state=42, stratify=y)
127
+ X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.50, random_state=42, stratify=y_temp)
128
+
129
+ print(f"\nDataset Split Stats:")
130
+ print(f"Train: {len(X_train)} samples")
131
+ print(f"Val: {len(X_val)} samples")
132
+ print(f"Test: {len(X_test)} samples")
133
+
134
+ return (X_train, y_train), (X_val, y_val), (X_test, y_test)
135
+
136
+ # --- 4. Early Stopping ---
137
+ class EarlyStopping:
138
+ def __init__(self, patience=5, verbose=False, path='checkpoint.pth'):
139
+ self.patience = patience
140
+ self.verbose = verbose
141
+ self.counter = 0
142
+ self.best_score = None
143
+ self.early_stop = False
144
+ self.val_loss_min = np.inf
145
+ self.path = path
146
+
147
+ def __call__(self, val_loss, model):
148
+ score = -val_loss
149
+
150
+ if self.best_score is None:
151
+ self.best_score = score
152
+ self.save_checkpoint(val_loss, model)
153
+ elif score < self.best_score:
154
+ self.counter += 1
155
+ if self.verbose:
156
+ print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
157
+ if self.counter >= self.patience:
158
+ self.early_stop = True
159
+ else:
160
+ self.best_score = score
161
+ self.save_checkpoint(val_loss, model)
162
+ self.counter = 0
163
+
164
+ def save_checkpoint(self, val_loss, model):
165
+ if self.verbose:
166
+ print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
167
+ torch.save(model, self.path)
168
+ self.val_loss_min = val_loss
169
+
170
+ # --- Main Execution ---
171
+ if __name__ == "__main__":
172
+ start_time = time.time()
173
+
174
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
175
+ print(f"Using device: {device}")
176
+
177
+ # Prepare Data
178
+ try:
179
+ (X_train, y_train), (X_val, y_val), (X_test, y_test) = prepare_data()
180
+ except Exception as e:
181
+ print(f"Data preparation failed: {e}")
182
+ exit(1)
183
+
184
+ train_dataset = ViolenceDataset(X_train, y_train, augment=True)
185
+ val_dataset = ViolenceDataset(X_val, y_val, augment=False)
186
+ test_dataset = ViolenceDataset(X_test, y_test, augment=False)
187
+
188
+ train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
189
+ val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
190
+ test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
191
+
192
+ # Model Setup (GRU)
193
+ model = ViolenceGRU().to(device)
194
+ criterion = nn.CrossEntropyLoss()
195
+ optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
196
+
197
+ early_stopping = EarlyStopping(patience=PATIENCE, verbose=True, path=MODEL_SAVE_PATH)
198
+
199
+ # Training Loop
200
+ print("\nStarting GRU Training...")
201
+
202
+ for epoch in range(EPOCHS):
203
+ model.train()
204
+ train_loss = 0.0
205
+ correct = 0
206
+ total = 0
207
+
208
+ for batch_idx, (inputs, labels) in enumerate(train_loader):
209
+ inputs, labels = inputs.to(device), labels.to(device)
210
+
211
+ optimizer.zero_grad()
212
+ outputs = model(inputs)
213
+ loss = criterion(outputs, labels)
214
+ loss.backward()
215
+ optimizer.step()
216
+
217
+ train_loss += loss.item()
218
+ _, predicted = torch.max(outputs.data, 1)
219
+ total += labels.size(0)
220
+ correct += (predicted == labels).sum().item()
221
+
222
+ if batch_idx % 10 == 0:
223
+ print(f"Epoch {epoch+1} Batch {batch_idx}/{len(train_loader)} Loss: {loss.item():.4f}", end='\r')
224
+
225
+ train_acc = 100 * correct / total
226
+ avg_train_loss = train_loss / len(train_loader)
227
+
228
+ # Validation
229
+ model.eval()
230
+ val_loss = 0.0
231
+ correct_val = 0
232
+ total_val = 0
233
+
234
+ with torch.no_grad():
235
+ for inputs, labels in val_loader:
236
+ inputs, labels = inputs.to(device), labels.to(device)
237
+ outputs = model(inputs)
238
+ loss = criterion(outputs, labels)
239
+ val_loss += loss.item()
240
+ _, predicted = torch.max(outputs.data, 1)
241
+ total_val += labels.size(0)
242
+ correct_val += (predicted == labels).sum().item()
243
+
244
+ val_acc = 100 * correct_val / total_val
245
+ avg_val_loss = val_loss / len(val_loader)
246
+
247
+ print(f'\nEpoch [{epoch+1}/{EPOCHS}] '
248
+ f'Train Loss: {avg_train_loss:.4f} Acc: {train_acc:.2f}% '
249
+ f'Val Loss: {avg_val_loss:.4f} Acc: {val_acc:.2f}%')
250
+
251
+ early_stopping(avg_val_loss, model)
252
+ if early_stopping.early_stop:
253
+ print("Early stopping triggered")
254
+ break
255
+
256
+ # --- Overall Evaluation ---
257
+ print("\nLoading best GRU model for evaluation...")
258
+ if os.path.exists(MODEL_SAVE_PATH):
259
+ model = torch.load(MODEL_SAVE_PATH)
260
+ else:
261
+ print("Warning: Model file not found, using last epoch model.")
262
+
263
+ model.eval()
264
+
265
+ all_preds = []
266
+ all_labels = []
267
+
268
+ print("Evaluating on Test set...")
269
+ with torch.no_grad():
270
+ for inputs, labels in test_loader:
271
+ inputs, labels = inputs.to(device), labels.to(device)
272
+ outputs = model(inputs)
273
+ _, predicted = torch.max(outputs.data, 1)
274
+ all_preds.extend(predicted.cpu().numpy())
275
+ all_labels.extend(labels.cpu().numpy())
276
+
277
+ print("\n=== GRU Model Evaluation Report ===")
278
+ print(classification_report(all_labels, all_preds, target_names=['No Violence', 'Violence']))
279
+
280
+ print("Confusion Matrix:")
281
+ cm = confusion_matrix(all_labels, all_preds)
282
+ print(cm)
283
+
284
+ acc = accuracy_score(all_labels, all_preds)
285
+ print(f"\nFinal Test Accuracy: {acc*100:.2f}%")
286
+
287
+ elapsed = time.time() - start_time
288
+ print(f"\nTotal execution time: {elapsed/60:.2f} minutes")