LEGIONM36 commited on
Commit
0e6cabe
·
verified ·
1 Parent(s): 5993c16

Upload 4 files

Browse files
Files changed (4) hide show
  1. best_model.pth +3 -0
  2. model.py +56 -0
  3. readme.md +28 -0
  4. train.py +304 -0
best_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd16663a6f7564fa5eb3f0dec7f038c4365a74aa50dfd6428a87e9c666bd031f
3
+ size 56066741
model.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ class ViolenceConv3D(nn.Module):
5
+ def __init__(self):
6
+ super(ViolenceConv3D, self).__init__()
7
+
8
+ # 4-Layer Conv3D Architecture
9
+ # Input: (Batch, 3, 16, 112, 112)
10
+
11
+ self.conv1 = nn.Conv3d(3, 32, kernel_size=(3, 3, 3), padding=(1, 1, 1))
12
+ self.bn1 = nn.BatchNorm3d(32)
13
+ self.pool1 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))
14
+
15
+ self.conv2 = nn.Conv3d(32, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1))
16
+ self.bn2 = nn.BatchNorm3d(64)
17
+ self.pool2 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))
18
+
19
+ self.conv3 = nn.Conv3d(64, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1))
20
+ self.bn3 = nn.BatchNorm3d(128)
21
+ self.pool3 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))
22
+
23
+ self.conv4 = nn.Conv3d(128, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1))
24
+ self.bn4 = nn.BatchNorm3d(256)
25
+ self.pool4 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))
26
+
27
+ self.relu = nn.ReLU()
28
+ self.dropout = nn.Dropout(0.5)
29
+
30
+ # Calculate Flatten Size dynamically based on architecture logic
31
+ # P1: 16 x 56 x 56
32
+ # P2: 8 x 28 x 28
33
+ # P3: 4 x 14 x 14
34
+ # P4: 2 x 7 x 7
35
+ self.flatten_dim = 256 * 2 * 7 * 7
36
+
37
+ self.fc1 = nn.Linear(self.flatten_dim, 512)
38
+ self.fc2 = nn.Linear(512, 2) # Binary Classification (Violence vs No-Violence)
39
+
40
+ def forward(self, x):
41
+ x = self.relu(self.bn1(self.conv1(x)))
42
+ x = self.pool1(x)
43
+
44
+ x = self.relu(self.bn2(self.conv2(x)))
45
+ x = self.pool2(x)
46
+
47
+ x = self.relu(self.bn3(self.conv3(x)))
48
+ x = self.pool3(x)
49
+
50
+ x = self.relu(self.bn4(self.conv4(x)))
51
+ x = self.pool4(x)
52
+
53
+ x = x.view(x.size(0), -1)
54
+ x = self.dropout(self.relu(self.fc1(x)))
55
+ x = self.fc2(x)
56
+ return x
readme.md ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Violence Detection using Conv3D
2
+
3
+ ## Model Architecture
4
+ - **Type**: 3D Convolutional Neural Network (Conv3D)
5
+ - **Input**: Video sequence of 16 frames, resized to 112x112.
6
+ - **Structure**:
7
+ - 4 Conv3D Layers with BatchNorm, ReLU, and MaxPooling.
8
+ - Flatten Layer.
9
+ - 2 Fully Connected Layers.
10
+ - Dropout (0.5) for regularization.
11
+ - **Output**: Binary Classification (Violence vs No-Violence).
12
+
13
+ ## Dataset Structure
14
+ The code expects a `Dataset` folder in the parent directory (or modify `DATASET_DIR` in `train.py`).
15
+ Structure:
16
+ ```
17
+ Dataset/
18
+ ├── violence/
19
+ │ ├── video1.mp4
20
+ │ └── ...
21
+ └── no-violence/
22
+ ├── video2.mp4
23
+ └── ...
24
+ ```
25
+
26
+ ## How to Run
27
+ 1. Install dependencies: `torch`, `opencv-python`, `scikit-learn`, `numpy`.
28
+ 2. Run `python train.py`.
train.py ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import numpy as np
4
+ import torch
5
+ import torch.nn as nn
6
+ import torch.optim as optim
7
+ from torch.utils.data import Dataset, DataLoader
8
+ from sklearn.model_selection import train_test_split
9
+ from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
10
+ import time
11
+ from model import ViolenceConv3D
12
+
13
+ # --- Configuration ---
14
+ # Adjusted for standalone folder usage
15
+ # We point to the parent directory's dataset to avoid copying errors
16
+ # In a real GitHub repo, users should place 'Dataset' in the root or update this path.
17
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # Points to parent of Conv3D_Model
18
+ DATASET_DIR = os.path.join(BASE_DIR, "Dataset")
19
+ MODEL_SAVE_PATH = "best_model.pth"
20
+
21
+ # Hyperparameters
22
+ IMG_SIZE = 112
23
+ SEQ_LEN = 16
24
+ BATCH_SIZE = 50
25
+ EPOCHS = 80
26
+ LEARNING_RATE = 1e-4
27
+ PATIENCE = 5 # Early stopping patience
28
+
29
+ # --- 1. Data Augmentation ---
30
+ def augment_video_frames(frames):
31
+ """
32
+ Apply augmentation to a sequence of frames.
33
+ All frames in the sequence must receive the same transformation parameters.
34
+ """
35
+ augmented_frames = []
36
+
37
+ # Decisions for augmentation
38
+ do_flip = np.random.random() > 0.5
39
+ do_rotate = np.random.random() > 0.5
40
+ angle = np.random.randint(-15, 15) if do_rotate else 0
41
+
42
+ # Color jitter parameters (Brightness/Contrast)
43
+ brightness = np.random.uniform(0.8, 1.2)
44
+ contrast = np.random.uniform(0.8, 1.2)
45
+
46
+ for frame in frames:
47
+ new_frame = frame.copy()
48
+
49
+ # Horizontal Flip
50
+ if do_flip:
51
+ new_frame = cv2.flip(new_frame, 1)
52
+
53
+ # Rotation
54
+ if do_rotate:
55
+ (h, w) = new_frame.shape[:2]
56
+ center = (w // 2, h // 2)
57
+ M = cv2.getRotationMatrix2D(center, angle, 1.0)
58
+ new_frame = cv2.warpAffine(new_frame, M, (w, h))
59
+
60
+ # Color Jitter (Brightness/Contrast)
61
+ new_frame = cv2.convertScaleAbs(new_frame, alpha=contrast, beta=(brightness-1)*50)
62
+
63
+ augmented_frames.append(new_frame)
64
+
65
+ return np.array(augmented_frames)
66
+
67
+ # --- Dataset Class ---
68
+ class ViolenceDataset(Dataset):
69
+ def __init__(self, video_paths, labels, transform=None, augment=False):
70
+ self.video_paths = video_paths
71
+ self.labels = labels
72
+ self.augment = augment
73
+
74
+ def __len__(self):
75
+ return len(self.video_paths)
76
+
77
+ def __getitem__(self, idx):
78
+ path = self.video_paths[idx]
79
+ label = self.labels[idx]
80
+
81
+ try:
82
+ frames = self._load_video(path)
83
+ except Exception as e:
84
+ # Fallback for corrupted video
85
+ print(f"Error loading {path}: {e}")
86
+ frames = np.zeros((SEQ_LEN, IMG_SIZE, IMG_SIZE, 3), dtype=np.uint8)
87
+
88
+ if self.augment:
89
+ frames = augment_video_frames(frames)
90
+
91
+ # Normalize and Channel First (C, D, H, W)
92
+ frames = torch.tensor(frames, dtype=torch.float32)
93
+ frames = frames / 255.0 # Normalize 0-1
94
+ frames = frames.permute(3, 0, 1, 2) # C, D, H, W
95
+
96
+ return frames, label
97
+
98
+ def _load_video(self, path):
99
+ cap = cv2.VideoCapture(path)
100
+ frames = []
101
+ try:
102
+ while True:
103
+ ret, frame = cap.read()
104
+ if not ret:
105
+ break
106
+ frame = cv2.resize(frame, (IMG_SIZE, IMG_SIZE))
107
+ frames.append(frame)
108
+ finally:
109
+ cap.release()
110
+
111
+ # Handle frame count adjustments
112
+ if len(frames) == 0:
113
+ return np.zeros((SEQ_LEN, IMG_SIZE, IMG_SIZE, 3), dtype=np.uint8)
114
+
115
+ if len(frames) < SEQ_LEN:
116
+ while len(frames) < SEQ_LEN:
117
+ frames.append(frames[-1])
118
+ elif len(frames) > SEQ_LEN:
119
+ indices = np.linspace(0, len(frames)-1, SEQ_LEN, dtype=int)
120
+ frames = [frames[i] for i in indices]
121
+
122
+ return np.array(frames)
123
+
124
+ # --- 2. Data Splitting ---
125
+ def prepare_data():
126
+ violence_dir = os.path.join(DATASET_DIR, 'violence')
127
+ no_violence_dir = os.path.join(DATASET_DIR, 'no-violence')
128
+
129
+ if not os.path.exists(violence_dir) or not os.path.exists(no_violence_dir):
130
+ raise FileNotFoundError(f"Dataset directories not found. Expected {violence_dir} and {no_violence_dir}")
131
+
132
+ violence_files = [os.path.join(violence_dir, f) for f in os.listdir(violence_dir) if f.endswith('.avi') or f.endswith('.mp4')]
133
+ no_violence_files = [os.path.join(no_violence_dir, f) for f in os.listdir(no_violence_dir) if f.endswith('.avi') or f.endswith('.mp4')]
134
+
135
+ print(f"Found {len(violence_files)} Violence videos")
136
+ print(f"Found {len(no_violence_files)} No-Violence videos")
137
+
138
+ X = violence_files + no_violence_files
139
+ y = [1] * len(violence_files) + [0] * len(no_violence_files)
140
+
141
+ # Split (70% Train, 15% Val, 15% Test)
142
+ X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.30, random_state=42, stratify=y)
143
+ X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.50, random_state=42, stratify=y_temp)
144
+
145
+ print(f"\nDataset Split Stats:")
146
+ print(f"Train: {len(X_train)} samples")
147
+ print(f"Val: {len(X_val)} samples")
148
+ print(f"Test: {len(X_test)} samples")
149
+
150
+ return (X_train, y_train), (X_val, y_val), (X_test, y_test)
151
+
152
+ # --- Early Stopping ---
153
+ class EarlyStopping:
154
+ def __init__(self, patience=5, verbose=False, path='checkpoint.pth'):
155
+ self.patience = patience
156
+ self.verbose = verbose
157
+ self.counter = 0
158
+ self.best_score = None
159
+ self.early_stop = False
160
+ self.val_loss_min = np.inf
161
+ self.path = path
162
+
163
+ def __call__(self, val_loss, model):
164
+ score = -val_loss
165
+
166
+ if self.best_score is None:
167
+ self.best_score = score
168
+ self.save_checkpoint(val_loss, model)
169
+ elif score < self.best_score:
170
+ self.counter += 1
171
+ if self.verbose:
172
+ print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
173
+ if self.counter >= self.patience:
174
+ self.early_stop = True
175
+ else:
176
+ self.best_score = score
177
+ self.save_checkpoint(val_loss, model)
178
+ self.counter = 0
179
+
180
+ def save_checkpoint(self, val_loss, model):
181
+ if self.verbose:
182
+ print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
183
+ torch.save(model, self.path)
184
+ self.val_loss_min = val_loss
185
+
186
+ # --- Main Execution ---
187
+ if __name__ == "__main__":
188
+ start_time = time.time()
189
+
190
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
191
+ print(f"Using device: {device}")
192
+
193
+ # Prepare Data
194
+ try:
195
+ (X_train, y_train), (X_val, y_val), (X_test, y_test) = prepare_data()
196
+ except Exception as e:
197
+ print(f"Data preparation failed: {e}")
198
+ exit(1)
199
+
200
+ train_dataset = ViolenceDataset(X_train, y_train, augment=True)
201
+ val_dataset = ViolenceDataset(X_val, y_val, augment=False)
202
+ test_dataset = ViolenceDataset(X_test, y_test, augment=False)
203
+
204
+ train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
205
+ val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
206
+ test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
207
+
208
+ # Model Setup
209
+ model = ViolenceConv3D().to(device)
210
+ criterion = nn.CrossEntropyLoss()
211
+ optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
212
+
213
+ early_stopping = EarlyStopping(patience=PATIENCE, verbose=True, path=MODEL_SAVE_PATH)
214
+
215
+ # Training Loop
216
+ print("\nStarting Training...")
217
+
218
+ for epoch in range(EPOCHS):
219
+ model.train()
220
+ train_loss = 0.0
221
+ correct = 0
222
+ total = 0
223
+
224
+ for batch_idx, (inputs, labels) in enumerate(train_loader):
225
+ inputs, labels = inputs.to(device), labels.to(device)
226
+
227
+ optimizer.zero_grad()
228
+ outputs = model(inputs)
229
+ loss = criterion(outputs, labels)
230
+ loss.backward()
231
+ optimizer.step()
232
+
233
+ train_loss += loss.item()
234
+ _, predicted = torch.max(outputs.data, 1)
235
+ total += labels.size(0)
236
+ correct += (predicted == labels).sum().item()
237
+
238
+ if batch_idx % 10 == 0:
239
+ print(f"Epoch {epoch+1} Batch {batch_idx}/{len(train_loader)} Loss: {loss.item():.4f}", end='\r')
240
+
241
+ train_acc = 100 * correct / total
242
+ avg_train_loss = train_loss / len(train_loader)
243
+
244
+ # Validation Phase
245
+ model.eval()
246
+ val_loss = 0.0
247
+ correct_val = 0
248
+ total_val = 0
249
+
250
+ with torch.no_grad():
251
+ for inputs, labels in val_loader:
252
+ inputs, labels = inputs.to(device), labels.to(device)
253
+ outputs = model(inputs)
254
+ loss = criterion(outputs, labels)
255
+ val_loss += loss.item()
256
+ _, predicted = torch.max(outputs.data, 1)
257
+ total_val += labels.size(0)
258
+ correct_val += (predicted == labels).sum().item()
259
+
260
+ val_acc = 100 * correct_val / total_val
261
+ avg_val_loss = val_loss / len(val_loader)
262
+
263
+ print(f'\nEpoch [{epoch+1}/{EPOCHS}] '
264
+ f'Train Loss: {avg_train_loss:.4f} Acc: {train_acc:.2f}% '
265
+ f'Val Loss: {avg_val_loss:.4f} Acc: {val_acc:.2f}%')
266
+
267
+ early_stopping(avg_val_loss, model)
268
+ if early_stopping.early_stop:
269
+ print("Early stopping triggered")
270
+ break
271
+
272
+ # Evaluation
273
+ print("\nLoading best model for overall evaluation...")
274
+ if os.path.exists(MODEL_SAVE_PATH):
275
+ model = torch.load(MODEL_SAVE_PATH)
276
+ else:
277
+ print("Warning: Model file not found, using last epoch model.")
278
+
279
+ model.eval()
280
+
281
+ all_preds = []
282
+ all_labels = []
283
+
284
+ print("Evaluating on Test set...")
285
+ with torch.no_grad():
286
+ for inputs, labels in test_loader:
287
+ inputs, labels = inputs.to(device), labels.to(device)
288
+ outputs = model(inputs)
289
+ _, predicted = torch.max(outputs.data, 1)
290
+ all_preds.extend(predicted.cpu().numpy())
291
+ all_labels.extend(labels.cpu().numpy())
292
+
293
+ print("\n=== Overall Evaluation Report ===")
294
+ print(classification_report(all_labels, all_preds, target_names=['No Violence', 'Violence']))
295
+
296
+ print("Confusion Matrix:")
297
+ cm = confusion_matrix(all_labels, all_preds)
298
+ print(cm)
299
+
300
+ acc = accuracy_score(all_labels, all_preds)
301
+ print(f"\nFinal Test Accuracy: {acc*100:.2f}%")
302
+
303
+ elapsed = time.time() - start_time
304
+ print(f"\nTotal execution time: {elapsed/60:.2f} minutes")