Upload 36 files
Browse files- __init__.py +0 -0
- __pycache__/evaluation.cpython-311.pyc +0 -0
- __pycache__/feature_extraction.cpython-311.pyc +0 -0
- __pycache__/load_dataset.cpython-311.pyc +0 -0
- __pycache__/lstm.cpython-311.pyc +0 -0
- __pycache__/preprocessing.cpython-311.pyc +0 -0
- __pycache__/testing.cpython-311.pyc +0 -0
- __pycache__/training.cpython-311.pyc +0 -0
- benchmarks/ucf101/accuracy_plot.png +0 -0
- benchmarks/ucf101/benchmark.txt +3 -0
- benchmarks/ucf101/confusion_matrix.png +0 -0
- benchmarks/ucf101/loss_plot.png +0 -0
- benchmarks/ucf101/model_performance.png +0 -0
- benchmarks/ucf11/accuracy_plot.png +0 -0
- benchmarks/ucf11/benchmark.txt +3 -0
- benchmarks/ucf11/confusion_matrix.png +0 -0
- benchmarks/ucf11/loss_plot.png +0 -0
- benchmarks/ucf11/model_performance.png +0 -0
- benchmarks/ucf50/accuracy_plot.png +0 -0
- benchmarks/ucf50/benchmark.txt +3 -0
- benchmarks/ucf50/confusion_matrix.png +0 -0
- benchmarks/ucf50/loss_plot.png +0 -0
- benchmarks/ucf50/model_performance.png +0 -0
- evaluation.py +52 -0
- feature_extraction.py +93 -0
- inference.py +97 -0
- label_map_idx2label_ucf101.json +103 -0
- label_map_idx2label_ucf11.json +13 -0
- label_map_idx2label_ucf50.json +53 -0
- load_dataset.py +104 -0
- lstm.py +41 -0
- main.py +54 -0
- models/ucf11_lstm_model.pt +3 -0
- preprocessing.py +10 -0
- testing.py +59 -0
- training.py +153 -0
__init__.py
ADDED
|
File without changes
|
__pycache__/evaluation.cpython-311.pyc
ADDED
|
Binary file (3 kB). View file
|
|
|
__pycache__/feature_extraction.cpython-311.pyc
ADDED
|
Binary file (6.61 kB). View file
|
|
|
__pycache__/load_dataset.cpython-311.pyc
ADDED
|
Binary file (4.34 kB). View file
|
|
|
__pycache__/lstm.cpython-311.pyc
ADDED
|
Binary file (3.79 kB). View file
|
|
|
__pycache__/preprocessing.cpython-311.pyc
ADDED
|
Binary file (829 Bytes). View file
|
|
|
__pycache__/testing.cpython-311.pyc
ADDED
|
Binary file (3.55 kB). View file
|
|
|
__pycache__/training.cpython-311.pyc
ADDED
|
Binary file (9.4 kB). View file
|
|
|
benchmarks/ucf101/accuracy_plot.png
ADDED
|
benchmarks/ucf101/benchmark.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Feature extraction time: 1773.93 seconds
|
| 2 |
+
Training time: 106.37 seconds
|
| 3 |
+
Test Loss: 0.2128, Test Accuracy: 94.37%
|
benchmarks/ucf101/confusion_matrix.png
ADDED
|
benchmarks/ucf101/loss_plot.png
ADDED
|
benchmarks/ucf101/model_performance.png
ADDED
|
benchmarks/ucf11/accuracy_plot.png
ADDED
|
benchmarks/ucf11/benchmark.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Feature extraction time: 88.29 seconds
|
| 2 |
+
Training time: 8.87 seconds
|
| 3 |
+
Test Loss: 0.1348, Test Accuracy: 95.31%
|
benchmarks/ucf11/confusion_matrix.png
ADDED
|
benchmarks/ucf11/loss_plot.png
ADDED
|
benchmarks/ucf11/model_performance.png
ADDED
|
benchmarks/ucf50/accuracy_plot.png
ADDED
|
benchmarks/ucf50/benchmark.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Feature extraction time: 557.77 seconds
|
| 2 |
+
Training time: 106.57 seconds
|
| 3 |
+
Test Loss: 0.1782, Test Accuracy: 94.76%
|
benchmarks/ucf50/confusion_matrix.png
ADDED
|
benchmarks/ucf50/loss_plot.png
ADDED
|
benchmarks/ucf50/model_performance.png
ADDED
|
evaluation.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
|
| 2 |
+
import matplotlib.pyplot as plt
|
| 3 |
+
import seaborn as sns
|
| 4 |
+
|
| 5 |
+
def modelEvaluation(y_pred, y_pred_proba, y_test, labels, dataset):
|
| 6 |
+
"""
|
| 7 |
+
Evaluate the model performance using various metrics.
|
| 8 |
+
"""
|
| 9 |
+
cm = confusion_matrix(y_test, y_pred)
|
| 10 |
+
|
| 11 |
+
# Accuracy score
|
| 12 |
+
acc_score = accuracy_score(y_test, y_pred)
|
| 13 |
+
# Precision score
|
| 14 |
+
pre_score = precision_score(y_test, y_pred, average='macro')
|
| 15 |
+
# Recall score
|
| 16 |
+
rec_score = recall_score(y_test, y_pred, average='macro')
|
| 17 |
+
# F1-score
|
| 18 |
+
f1 = f1_score(y_test, y_pred, average='macro')
|
| 19 |
+
# AUC score
|
| 20 |
+
auc = roc_auc_score(y_test, y_pred_proba, multi_class='ovr')
|
| 21 |
+
|
| 22 |
+
# Draw metrics
|
| 23 |
+
metrics_labels = ['Accuracy', 'Precision', 'Recall', 'F1-Score', 'AUC']
|
| 24 |
+
metrics_values = [acc_score, pre_score, rec_score, f1, auc]
|
| 25 |
+
plt.figure(figsize=(10, 6))
|
| 26 |
+
ax = sns.barplot(
|
| 27 |
+
x=metrics_labels,
|
| 28 |
+
y=metrics_values,
|
| 29 |
+
hue=metrics_labels, # Assign `hue` to `x` variable
|
| 30 |
+
dodge=False,
|
| 31 |
+
palette=["#FF6F61", "#92A8D1", "#88B04B", "#F7CAC9", "#61ffbd"], # Add 5 colors
|
| 32 |
+
legend=False # Disable legend
|
| 33 |
+
)
|
| 34 |
+
for i, v in enumerate(metrics_values):
|
| 35 |
+
ax.text(i, v - 0.04, f"{v:.4f}", ha='center', va='bottom', fontsize=10)
|
| 36 |
+
plt.title("Model Performance Metrics")
|
| 37 |
+
plt.ylim(0, 1)
|
| 38 |
+
plt.ylabel("Score")
|
| 39 |
+
plt.xlabel("Metrics")
|
| 40 |
+
plt.savefig(f"./benchmarks/{dataset}/model_performance.png")
|
| 41 |
+
plt.close()
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
# Draw confusion matrix
|
| 45 |
+
plt.figure(figsize=(12, 10))
|
| 46 |
+
sns.heatmap(cm, annot=False, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)
|
| 47 |
+
plt.title(f"Confusion Matrix", pad=20) # Adjust title position
|
| 48 |
+
plt.xlabel("Predicted", labelpad=15) # Adjust x-axis label position
|
| 49 |
+
plt.ylabel("Actual", labelpad=15) # Adjust y-axis label position
|
| 50 |
+
plt.savefig(f"./benchmarks/{dataset}/confusion_matrix.png", bbox_inches='tight') # Ensure the image is tightly cropped
|
| 51 |
+
|
| 52 |
+
return acc_score, pre_score, rec_score, f1, auc
|
feature_extraction.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from sklearn.preprocessing import LabelEncoder
|
| 3 |
+
import torch
|
| 4 |
+
import os
|
| 5 |
+
from torchvision import models
|
| 6 |
+
import time
|
| 7 |
+
import json
|
| 8 |
+
|
| 9 |
+
def extract_features(samples, transform, dataset):
|
| 10 |
+
print("Extracting features using ResNet50...")
|
| 11 |
+
# Load the pre-trained ResNet50 model
|
| 12 |
+
start_time = time.time()
|
| 13 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 14 |
+
resnet = models.resnet50(weights=models.ResNet50_Weights.DEFAULT).to(device)
|
| 15 |
+
resnet.eval()
|
| 16 |
+
|
| 17 |
+
# Remove the last layer of the ResNet50 model to obtain the feature extractor
|
| 18 |
+
resnet_feat = torch.nn.Sequential(*list(resnet.children())[:-1]).to(device)
|
| 19 |
+
resnet_feat.eval()
|
| 20 |
+
|
| 21 |
+
processed_samples = []
|
| 22 |
+
for frames, label in samples:
|
| 23 |
+
transformed_frames = [transform(frame).to(device) for frame in frames]
|
| 24 |
+
frames_tensor = torch.stack(transformed_frames, dim=0).to(device)
|
| 25 |
+
with torch.no_grad():
|
| 26 |
+
features_tensor = resnet_feat(frames_tensor) # Shape: (T, 2048, 1, 1)
|
| 27 |
+
features = torch.flatten(features_tensor, start_dim=1).cpu().numpy()
|
| 28 |
+
|
| 29 |
+
processed_samples.append((features, label))
|
| 30 |
+
|
| 31 |
+
end_time = time.time()
|
| 32 |
+
print(f"Feature extraction completed in {end_time - start_time:.2f} seconds")
|
| 33 |
+
|
| 34 |
+
os.makedirs(f'./benchmarks/{dataset}', exist_ok=True)
|
| 35 |
+
|
| 36 |
+
# Save the feature extraction time in file .txt
|
| 37 |
+
with open(f'./benchmarks/{dataset}/benchmark.txt', 'w') as f:
|
| 38 |
+
f.write(f"Feature extraction time: {end_time - start_time:.2f} seconds\n")
|
| 39 |
+
|
| 40 |
+
return processed_samples
|
| 41 |
+
|
| 42 |
+
def splittingData(samples, dataset):
|
| 43 |
+
# Shuffle the samples
|
| 44 |
+
np.random.shuffle(samples)
|
| 45 |
+
|
| 46 |
+
# Split the samples into training and testing sets (80% training, 20% testing)
|
| 47 |
+
split_idx = int(0.8 * len(samples))
|
| 48 |
+
train_samples = samples[:split_idx]
|
| 49 |
+
test_samples = samples[split_idx:]
|
| 50 |
+
|
| 51 |
+
# Split the training sets into validation and training sets (80% training, 20% validation)
|
| 52 |
+
validation_split_idx = int(0.8 * len(train_samples))
|
| 53 |
+
train_samples, val_samples = train_samples[:validation_split_idx], train_samples[validation_split_idx:]
|
| 54 |
+
|
| 55 |
+
# Separate features and labels for training, validation, and testing sets
|
| 56 |
+
train_features, train_labels = zip(*train_samples)
|
| 57 |
+
val_features, val_labels = zip(*val_samples)
|
| 58 |
+
test_features, test_labels = zip(*test_samples)
|
| 59 |
+
|
| 60 |
+
# Convert the labels to numerical labels using a LabelEncoder
|
| 61 |
+
le = LabelEncoder()
|
| 62 |
+
train_labels = le.fit_transform(train_labels)
|
| 63 |
+
val_labels = le.transform(val_labels)
|
| 64 |
+
test_labels = le.transform(test_labels)
|
| 65 |
+
|
| 66 |
+
train_features = np.array(train_features)
|
| 67 |
+
val_features = np.array(val_features)
|
| 68 |
+
test_features = np.array(test_features)
|
| 69 |
+
|
| 70 |
+
# Print the shapes of the features and labels arrays
|
| 71 |
+
print("Train Features shape:", train_features.shape)
|
| 72 |
+
print("Train Labels shape:", train_labels.shape)
|
| 73 |
+
print("Validation Features shape:", val_features.shape)
|
| 74 |
+
print("Validation Labels shape:", val_labels.shape)
|
| 75 |
+
print("Test Features shape:", test_features.shape)
|
| 76 |
+
print("Test Labels shape:", test_labels.shape)
|
| 77 |
+
|
| 78 |
+
os.makedirs(f'./features/{dataset}', exist_ok=True)
|
| 79 |
+
|
| 80 |
+
# Save the features and labels to numpy arrays
|
| 81 |
+
np.save(f'./features/{dataset}/train_features.npy', train_features)
|
| 82 |
+
np.save(f'./features/{dataset}/train_labels.npy', train_labels)
|
| 83 |
+
np.save(f'./features/{dataset}/val_features.npy', val_features)
|
| 84 |
+
np.save(f'./features/{dataset}/val_labels.npy', val_labels)
|
| 85 |
+
np.save(f'./features/{dataset}/test_features.npy', test_features)
|
| 86 |
+
np.save(f'./features/{dataset}/test_labels.npy', test_labels)
|
| 87 |
+
|
| 88 |
+
idx2label = {i: label for i, label in enumerate(le.classes_)}
|
| 89 |
+
with open(f'./features/{dataset}/label_map_idx2label.json', 'w') as f:
|
| 90 |
+
json.dump(idx2label, f, indent=4)
|
| 91 |
+
|
| 92 |
+
# Save the LabelEncoder for later use
|
| 93 |
+
return le
|
inference.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import cv2
|
| 3 |
+
import numpy as np
|
| 4 |
+
from torchvision import models, transforms
|
| 5 |
+
from torchvision.models import resnet50, ResNet50_Weights
|
| 6 |
+
from lstm import MultiLayerBiLSTMClassifier
|
| 7 |
+
from preprocessing import preprocessingData
|
| 8 |
+
import argparse
|
| 9 |
+
import os
|
| 10 |
+
import json
|
| 11 |
+
|
| 12 |
+
def load_label_map(dataset):
|
| 13 |
+
label_path = f"src/label_map_idx2label_{dataset}.json"
|
| 14 |
+
if not os.path.exists(label_path):
|
| 15 |
+
raise FileNotFoundError(f"Label map not found: {label_path}")
|
| 16 |
+
with open(label_path, "r", encoding="utf-8") as f:
|
| 17 |
+
return json.load(f)
|
| 18 |
+
|
| 19 |
+
def read_video_frames(video_path, num_frames=16):
|
| 20 |
+
cap = cv2.VideoCapture(video_path)
|
| 21 |
+
if not cap.isOpened():
|
| 22 |
+
raise RuntimeError(f"Cannot open video file: {video_path}")
|
| 23 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 24 |
+
if total_frames == 0:
|
| 25 |
+
raise RuntimeError(f"Video contains no frames: {video_path}")
|
| 26 |
+
|
| 27 |
+
frame_indices = np.linspace(0, total_frames - 1, num_frames).astype(int)
|
| 28 |
+
frames = []
|
| 29 |
+
for idx in range(total_frames):
|
| 30 |
+
ret, frame = cap.read()
|
| 31 |
+
if not ret:
|
| 32 |
+
break
|
| 33 |
+
if idx in frame_indices:
|
| 34 |
+
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 35 |
+
frames.append(frame_rgb)
|
| 36 |
+
cap.release()
|
| 37 |
+
|
| 38 |
+
if len(frames) == 0:
|
| 39 |
+
raise RuntimeError("No frames extracted from video.")
|
| 40 |
+
while len(frames) < num_frames:
|
| 41 |
+
frames.append(frames[-1])
|
| 42 |
+
|
| 43 |
+
return frames[:num_frames]
|
| 44 |
+
|
| 45 |
+
def load_model(model_path, input_size, hidden_size, num_layers, num_classes):
|
| 46 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 47 |
+
model = MultiLayerBiLSTMClassifier(input_size, hidden_size, num_layers, num_classes).to(device)
|
| 48 |
+
model.load_state_dict(torch.load(model_path, map_location=device))
|
| 49 |
+
model.eval()
|
| 50 |
+
return model
|
| 51 |
+
|
| 52 |
+
def inference(dataset, video_path, model_path):
|
| 53 |
+
num_frames = 32
|
| 54 |
+
hidden_size = 256
|
| 55 |
+
num_layers = 2
|
| 56 |
+
|
| 57 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 58 |
+
|
| 59 |
+
# Load label map and number of classes
|
| 60 |
+
label_map = load_label_map(dataset)
|
| 61 |
+
num_classes = len(label_map)
|
| 62 |
+
|
| 63 |
+
# Step 1: Read and process video
|
| 64 |
+
frames = read_video_frames(video_path, num_frames)
|
| 65 |
+
transform = preprocessingData()
|
| 66 |
+
transformed_frames = [transform(frame) for frame in frames]
|
| 67 |
+
frames_tensor = torch.stack(transformed_frames, dim=0).to(device)
|
| 68 |
+
|
| 69 |
+
# Step 2: Extract features
|
| 70 |
+
resnet = models.resnet50(weights=ResNet50_Weights.DEFAULT).to(device)
|
| 71 |
+
resnet_feat = torch.nn.Sequential(*list(resnet.children())[:-1])
|
| 72 |
+
resnet.eval()
|
| 73 |
+
with torch.no_grad():
|
| 74 |
+
features_tensor = resnet_feat(frames_tensor)
|
| 75 |
+
features = torch.flatten(features_tensor, start_dim=1).cpu().numpy()
|
| 76 |
+
|
| 77 |
+
# Step 3: Load model
|
| 78 |
+
input_size = features.shape[1]
|
| 79 |
+
model = load_model(model_path, input_size, hidden_size, num_layers, num_classes)
|
| 80 |
+
|
| 81 |
+
# Step 4: Predict
|
| 82 |
+
with torch.no_grad():
|
| 83 |
+
input_seq = torch.from_numpy(features).unsqueeze(0).float().to(device)
|
| 84 |
+
outputs = model(input_seq)
|
| 85 |
+
predicted_class = torch.argmax(outputs, dim=1).item()
|
| 86 |
+
predicted_label = label_map[str(predicted_class)]
|
| 87 |
+
|
| 88 |
+
print(f"Predicted class index: {predicted_class} ({predicted_label})")
|
| 89 |
+
|
| 90 |
+
if __name__ == "__main__":
|
| 91 |
+
parser = argparse.ArgumentParser(description="Inference on a single video using trained HAR model")
|
| 92 |
+
parser.add_argument("dataset", type=str, help="Dataset used to train model (ucf11 or ucf50)")
|
| 93 |
+
parser.add_argument("video_path", type=str, help="Path to input video file")
|
| 94 |
+
parser.add_argument("model_path", type=str, help="Path to trained model (.pt)")
|
| 95 |
+
args = parser.parse_args()
|
| 96 |
+
|
| 97 |
+
inference(args.dataset.lower(), args.video_path, args.model_path)
|
label_map_idx2label_ucf101.json
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"0": "ApplyEyeMakeup",
|
| 3 |
+
"1": "ApplyLipstick",
|
| 4 |
+
"2": "Archery",
|
| 5 |
+
"3": "BabyCrawling",
|
| 6 |
+
"4": "BalanceBeam",
|
| 7 |
+
"5": "BandMarching",
|
| 8 |
+
"6": "BaseballPitch",
|
| 9 |
+
"7": "Basketball",
|
| 10 |
+
"8": "BasketballDunk",
|
| 11 |
+
"9": "BenchPress",
|
| 12 |
+
"10": "Biking",
|
| 13 |
+
"11": "Billiards",
|
| 14 |
+
"12": "BlowDryHair",
|
| 15 |
+
"13": "BlowingCandles",
|
| 16 |
+
"14": "BodyWeightSquats",
|
| 17 |
+
"15": "Bowling",
|
| 18 |
+
"16": "BoxingPunchingBag",
|
| 19 |
+
"17": "BoxingSpeedBag",
|
| 20 |
+
"18": "BreastStroke",
|
| 21 |
+
"19": "BrushingTeeth",
|
| 22 |
+
"20": "CleanAndJerk",
|
| 23 |
+
"21": "CliffDiving",
|
| 24 |
+
"22": "CricketBowling",
|
| 25 |
+
"23": "CricketShot",
|
| 26 |
+
"24": "CuttingInKitchen",
|
| 27 |
+
"25": "Diving",
|
| 28 |
+
"26": "Drumming",
|
| 29 |
+
"27": "Fencing",
|
| 30 |
+
"28": "FieldHockeyPenalty",
|
| 31 |
+
"29": "FloorGymnastics",
|
| 32 |
+
"30": "FrisbeeCatch",
|
| 33 |
+
"31": "FrontCrawl",
|
| 34 |
+
"32": "GolfSwing",
|
| 35 |
+
"33": "Haircut",
|
| 36 |
+
"34": "HammerThrow",
|
| 37 |
+
"35": "HandstandPushups",
|
| 38 |
+
"36": "HandstandWalking",
|
| 39 |
+
"37": "HeadMassage",
|
| 40 |
+
"38": "HighJump",
|
| 41 |
+
"39": "HorseRace",
|
| 42 |
+
"40": "HorseRiding",
|
| 43 |
+
"41": "HulaHoop",
|
| 44 |
+
"42": "IceDancing",
|
| 45 |
+
"43": "JavelinThrow",
|
| 46 |
+
"44": "JugglingBalls",
|
| 47 |
+
"45": "JumpRope",
|
| 48 |
+
"46": "JumpingJack",
|
| 49 |
+
"47": "Kayaking",
|
| 50 |
+
"48": "Knitting",
|
| 51 |
+
"49": "LongJump",
|
| 52 |
+
"50": "Lunges",
|
| 53 |
+
"51": "MilitaryParade",
|
| 54 |
+
"52": "Mixing",
|
| 55 |
+
"53": "MoppingFloor",
|
| 56 |
+
"54": "Nunchucks",
|
| 57 |
+
"55": "ParallelBars",
|
| 58 |
+
"56": "PizzaTossing",
|
| 59 |
+
"57": "PlayingCello",
|
| 60 |
+
"58": "PlayingDaf",
|
| 61 |
+
"59": "PlayingDhol",
|
| 62 |
+
"60": "PlayingFlute",
|
| 63 |
+
"61": "PlayingGuitar",
|
| 64 |
+
"62": "PlayingPiano",
|
| 65 |
+
"63": "PlayingSitar",
|
| 66 |
+
"64": "PlayingTabla",
|
| 67 |
+
"65": "PlayingViolin",
|
| 68 |
+
"66": "PoleVault",
|
| 69 |
+
"67": "PommelHorse",
|
| 70 |
+
"68": "PullUps",
|
| 71 |
+
"69": "Punch",
|
| 72 |
+
"70": "PushUps",
|
| 73 |
+
"71": "Rafting",
|
| 74 |
+
"72": "RockClimbingIndoor",
|
| 75 |
+
"73": "RopeClimbing",
|
| 76 |
+
"74": "Rowing",
|
| 77 |
+
"75": "SalsaSpin",
|
| 78 |
+
"76": "ShavingBeard",
|
| 79 |
+
"77": "Shotput",
|
| 80 |
+
"78": "SkateBoarding",
|
| 81 |
+
"79": "Skiing",
|
| 82 |
+
"80": "Skijet",
|
| 83 |
+
"81": "SkyDiving",
|
| 84 |
+
"82": "SoccerJuggling",
|
| 85 |
+
"83": "SoccerPenalty",
|
| 86 |
+
"84": "StillRings",
|
| 87 |
+
"85": "SumoWrestling",
|
| 88 |
+
"86": "Surfing",
|
| 89 |
+
"87": "Swing",
|
| 90 |
+
"88": "TableTennisShot",
|
| 91 |
+
"89": "TaiChi",
|
| 92 |
+
"90": "TennisSwing",
|
| 93 |
+
"91": "ThrowDiscus",
|
| 94 |
+
"92": "TrampolineJumping",
|
| 95 |
+
"93": "Typing",
|
| 96 |
+
"94": "UnevenBars",
|
| 97 |
+
"95": "VolleyballSpiking",
|
| 98 |
+
"96": "WalkingWithDog",
|
| 99 |
+
"97": "WallPushups",
|
| 100 |
+
"98": "WritingOnBoard",
|
| 101 |
+
"99": "YoYo",
|
| 102 |
+
"100": "PushUpsClap"
|
| 103 |
+
}
|
label_map_idx2label_ucf11.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"0": "basketball_shooting",
|
| 3 |
+
"1": "biking",
|
| 4 |
+
"2": "diving",
|
| 5 |
+
"3": "golf_swing",
|
| 6 |
+
"4": "horse_riding",
|
| 7 |
+
"5": "soccer_juggling",
|
| 8 |
+
"6": "swing",
|
| 9 |
+
"7": "tennis_swing",
|
| 10 |
+
"8": "trampoline_jumping",
|
| 11 |
+
"9": "volleyball_spiking",
|
| 12 |
+
"10": "walking"
|
| 13 |
+
}
|
label_map_idx2label_ucf50.json
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"0": "Baseball Pitch",
|
| 3 |
+
"1": "Basketball Shooting",
|
| 4 |
+
"2": "Bench Press",
|
| 5 |
+
"3": "Biking",
|
| 6 |
+
"4": "Billiards",
|
| 7 |
+
"5": "BreastStroke",
|
| 8 |
+
"6": "Clean and Jerk",
|
| 9 |
+
"7": "Cricket Bowling",
|
| 10 |
+
"8": "Cricket Shot",
|
| 11 |
+
"9": "Diving",
|
| 12 |
+
"10": "Drumming",
|
| 13 |
+
"11": "Fencing",
|
| 14 |
+
"12": "Floor Gymnastics",
|
| 15 |
+
"13": "Golf Swing",
|
| 16 |
+
"14": "Hammer Throw",
|
| 17 |
+
"15": "High Jump",
|
| 18 |
+
"16": "Horse Race",
|
| 19 |
+
"17": "Horse Riding",
|
| 20 |
+
"18": "Hula Hoop",
|
| 21 |
+
"19": "Javelin Throw",
|
| 22 |
+
"20": "Juggling Balls",
|
| 23 |
+
"21": "Jump Rope",
|
| 24 |
+
"22": "Jumping Jack",
|
| 25 |
+
"23": "Kayaking",
|
| 26 |
+
"24": "Lunges",
|
| 27 |
+
"25": "Military Parade",
|
| 28 |
+
"26": "Mixing Batter",
|
| 29 |
+
"27": "Nun Chucks",
|
| 30 |
+
"28": "Parallel Bars",
|
| 31 |
+
"29": "Pizza Tossing",
|
| 32 |
+
"30": "Playing Cello",
|
| 33 |
+
"31": "Playing Daf",
|
| 34 |
+
"32": "Playing Dhol",
|
| 35 |
+
"33": "Playing Flute",
|
| 36 |
+
"34": "Playing Guitar",
|
| 37 |
+
"35": "Playing Piano",
|
| 38 |
+
"36": "Playing Tabla",
|
| 39 |
+
"37": "Playing Violin",
|
| 40 |
+
"38": "Pole Vault",
|
| 41 |
+
"39": "Pommel Horse",
|
| 42 |
+
"40": "Pull Ups",
|
| 43 |
+
"41": "Punch",
|
| 44 |
+
"42": "Push Ups",
|
| 45 |
+
"43": "Rock Climbing Indoor",
|
| 46 |
+
"44": "Rope Climbing",
|
| 47 |
+
"45": "Rowing",
|
| 48 |
+
"46": "Salsa Spin",
|
| 49 |
+
"47": "Skate Boarding",
|
| 50 |
+
"48": "Skiing",
|
| 51 |
+
"49": "Skijet",
|
| 52 |
+
"50": "Soccer Juggling"
|
| 53 |
+
}
|
load_dataset.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import cv2
|
| 3 |
+
import rarfile
|
| 4 |
+
import patoolib
|
| 5 |
+
import tempfile
|
| 6 |
+
import shutil
|
| 7 |
+
import random
|
| 8 |
+
|
| 9 |
+
def read_UCF11(data_dir, num_frames):
|
| 10 |
+
"""
|
| 11 |
+
Reads video data, applies transformations, and extracts features using ResNet50.
|
| 12 |
+
This function is used for UCF11 dataset.
|
| 13 |
+
"""
|
| 14 |
+
samples= [] # List to store the features and labels
|
| 15 |
+
# Loop over the videos in the dataset folder
|
| 16 |
+
for label in os.listdir(data_dir):
|
| 17 |
+
label_dir = os.path.join(data_dir, label)
|
| 18 |
+
print(label_dir)
|
| 19 |
+
for sub_dir in os.listdir(label_dir):
|
| 20 |
+
if sub_dir == 'Annotation':
|
| 21 |
+
continue
|
| 22 |
+
video_dir = os.path.join(label_dir, sub_dir)
|
| 23 |
+
for video_file in os.listdir(video_dir):
|
| 24 |
+
video_path = os.path.join(video_dir, video_file)
|
| 25 |
+
cap = cv2.VideoCapture(video_path)
|
| 26 |
+
frame_count = 0
|
| 27 |
+
frames = []
|
| 28 |
+
while True:
|
| 29 |
+
ret, frame = cap.read()
|
| 30 |
+
if ret:
|
| 31 |
+
frame_count += 1
|
| 32 |
+
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 33 |
+
frames.append(frame)
|
| 34 |
+
if frame_count == num_frames:
|
| 35 |
+
break
|
| 36 |
+
else:
|
| 37 |
+
break
|
| 38 |
+
cap.release()
|
| 39 |
+
if len(frames) == num_frames:
|
| 40 |
+
samples.append((frames, label))
|
| 41 |
+
return samples
|
| 42 |
+
|
| 43 |
+
def read_UCF50(data_dir, num_frames):
|
| 44 |
+
"""
|
| 45 |
+
Reads video data, applies transformations, and extracts features using ResNet50.
|
| 46 |
+
This function is used for UCF50 dataset.
|
| 47 |
+
"""
|
| 48 |
+
samples= [] # List to store the features and labels
|
| 49 |
+
# Loop over the videos in the dataset folder
|
| 50 |
+
for label in os.listdir(data_dir):
|
| 51 |
+
label_dir = os.path.join(data_dir, label)
|
| 52 |
+
print(label_dir)
|
| 53 |
+
for video_file in os.listdir(label_dir):
|
| 54 |
+
video_path = os.path.join(label_dir, video_file)
|
| 55 |
+
cap = cv2.VideoCapture(video_path)
|
| 56 |
+
frame_count = 0
|
| 57 |
+
frames = []
|
| 58 |
+
while True:
|
| 59 |
+
ret, frame = cap.read()
|
| 60 |
+
if ret:
|
| 61 |
+
frame_count += 1
|
| 62 |
+
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 63 |
+
frames.append(frame)
|
| 64 |
+
if frame_count == num_frames:
|
| 65 |
+
break
|
| 66 |
+
else:
|
| 67 |
+
break
|
| 68 |
+
cap.release()
|
| 69 |
+
if len(frames) == num_frames:
|
| 70 |
+
samples.append((frames, label))
|
| 71 |
+
return samples
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def read_UCF101(data_dir, num_frames):
|
| 75 |
+
"""
|
| 76 |
+
Reads video data, applies transformations, and extracts features using ResNet50.
|
| 77 |
+
This function is used for UCF50 dataset.
|
| 78 |
+
"""
|
| 79 |
+
samples= [] # List to store the features and labels
|
| 80 |
+
# Loop over the videos in the dataset folder
|
| 81 |
+
for label in os.listdir(data_dir):
|
| 82 |
+
label_dir = os.path.join(data_dir, label)
|
| 83 |
+
print(label_dir)
|
| 84 |
+
|
| 85 |
+
for video_file in os.listdir(label_dir):
|
| 86 |
+
video_path = os.path.join(label_dir, video_file)
|
| 87 |
+
cap = cv2.VideoCapture(video_path)
|
| 88 |
+
frame_count = 0
|
| 89 |
+
frames = []
|
| 90 |
+
while True:
|
| 91 |
+
ret, frame = cap.read()
|
| 92 |
+
if ret:
|
| 93 |
+
frame_count += 1
|
| 94 |
+
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 95 |
+
frames.append(frame)
|
| 96 |
+
if frame_count == num_frames:
|
| 97 |
+
break
|
| 98 |
+
else:
|
| 99 |
+
break
|
| 100 |
+
cap.release()
|
| 101 |
+
|
| 102 |
+
if len(frames) == num_frames:
|
| 103 |
+
samples.append((frames, label))
|
| 104 |
+
return samples
|
lstm.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class LSTMClassifier(nn.Module):
|
| 6 |
+
def __init__(self, input_size, hidden_size, num_classes):
|
| 7 |
+
super(LSTMClassifier, self).__init__()
|
| 8 |
+
self.hidden_size = hidden_size
|
| 9 |
+
self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
|
| 10 |
+
self.fc = nn.Linear(hidden_size, num_classes)
|
| 11 |
+
|
| 12 |
+
def forward(self, x):
|
| 13 |
+
h0 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
|
| 14 |
+
c0 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
|
| 15 |
+
# Forward propagate LSTM
|
| 16 |
+
out, _ = self.lstm(x, (h0, c0))
|
| 17 |
+
# Decode the hidden state of the last time step
|
| 18 |
+
out = self.fc(out[:, -1, :])
|
| 19 |
+
out = nn.functional.softmax(out, dim=1)
|
| 20 |
+
return out
|
| 21 |
+
|
| 22 |
+
class MultiLayerBiLSTMClassifier(nn.Module):
|
| 23 |
+
def __init__(self, input_size, hidden_size, num_layers, num_classes):
|
| 24 |
+
super().__init__()
|
| 25 |
+
self.hidden_size = hidden_size
|
| 26 |
+
self.num_layers = num_layers
|
| 27 |
+
self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True, dropout=0.2)
|
| 28 |
+
self.fc = nn.Linear(hidden_size*2, num_classes) # *2 to account for bidirectional LSTM
|
| 29 |
+
self.dropout = nn.Dropout(0.2)
|
| 30 |
+
|
| 31 |
+
def forward(self, x):
|
| 32 |
+
# Initialize hidden state and cell state with zeros
|
| 33 |
+
h0 = torch.zeros(2*self.num_layers, x.size(0), self.hidden_size).to(x.device) # *2 to account for bidirectional LSTM
|
| 34 |
+
c0 = torch.zeros(2*self.num_layers, x.size(0), self.hidden_size).to(x.device) # *2 to account for bidirectional LSTM
|
| 35 |
+
# Forward propagate bidirectional LSTM
|
| 36 |
+
out, _ = self.lstm(x, (h0, c0))
|
| 37 |
+
out = self.dropout(out[:, -1, :]) # Apply dropout before FC layer
|
| 38 |
+
# Decode the hidden state of the last time step
|
| 39 |
+
out = self.fc(out)
|
| 40 |
+
#out = nn.functional.softmax(out, dim=1)
|
| 41 |
+
return out
|
main.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from load_dataset import read_UCF11, read_UCF50, read_UCF101
|
| 2 |
+
from preprocessing import preprocessingData
|
| 3 |
+
from feature_extraction import extract_features, splittingData
|
| 4 |
+
from training import loadFeatures, trainModel
|
| 5 |
+
from testing import testModel
|
| 6 |
+
from evaluation import modelEvaluation
|
| 7 |
+
import numpy as np
|
| 8 |
+
import argparse
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def main(dataset, data_dir):
|
| 12 |
+
# Define the number of frames to extract features
|
| 13 |
+
num_frames = 16
|
| 14 |
+
|
| 15 |
+
if dataset.lower() == 'ucf11':
|
| 16 |
+
# Load the UCF11 dataset
|
| 17 |
+
samples = read_UCF11(data_dir, num_frames)
|
| 18 |
+
elif dataset.lower() == 'ucf50':
|
| 19 |
+
# Load the UCF50 dataset
|
| 20 |
+
samples = read_UCF50(data_dir, num_frames)
|
| 21 |
+
elif dataset.lower() == 'ucf101':
|
| 22 |
+
# Load the UCF101 dataset
|
| 23 |
+
samples = read_UCF101(data_dir, num_frames)
|
| 24 |
+
|
| 25 |
+
# Preprocess the data
|
| 26 |
+
processed_data = preprocessingData()
|
| 27 |
+
|
| 28 |
+
# Extract features using ResNet50
|
| 29 |
+
processed_samples = extract_features(samples, processed_data, dataset.lower())
|
| 30 |
+
|
| 31 |
+
# Split the data into training, validation, and testing sets
|
| 32 |
+
le = splittingData(processed_samples, dataset.lower())
|
| 33 |
+
|
| 34 |
+
# Load the features and labels for training, validation, and testing sets
|
| 35 |
+
train_features, train_labels, val_features, val_labels, test_features, test_labels = loadFeatures(dataset.lower())
|
| 36 |
+
|
| 37 |
+
# Train the model
|
| 38 |
+
model = trainModel(train_features, train_labels, val_features, val_labels, dataset.lower())
|
| 39 |
+
|
| 40 |
+
# Test the model
|
| 41 |
+
y_pred, y_pred_proba = testModel(model, test_features, test_labels, dataset.lower(), num_frames)
|
| 42 |
+
|
| 43 |
+
# Evaluate the model
|
| 44 |
+
labels = np.arange(0, len(np.unique(test_labels)), 1)
|
| 45 |
+
acc_score, pre_score, rec_score, f1, auc = modelEvaluation(y_pred, y_pred_proba, test_labels, labels, dataset.lower())
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
if __name__ == "__main__":
|
| 49 |
+
parser = argparse.ArgumentParser(description='Train and evaluate a model on UCF11 or UCF50 dataset.')
|
| 50 |
+
parser.add_argument('dataset', type=str, help='Dataset to use (UCF11 or UCF50)')
|
| 51 |
+
parser.add_argument('data_dir', type=str, help='Directory containing the dataset')
|
| 52 |
+
args = parser.parse_args()
|
| 53 |
+
|
| 54 |
+
main(args.dataset, args.data_dir)
|
models/ucf11_lstm_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:849bb832172d8fbdd0bc5fca049c8247c002701358377d8b6db6e04504333575
|
| 3 |
+
size 25224403
|
preprocessing.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torchvision.transforms as transforms
|
| 2 |
+
|
| 3 |
+
def preprocessingData():
|
| 4 |
+
transform = transforms.Compose([
|
| 5 |
+
transforms.ToPILImage(), # Converts the frame from a NumPy array to a PIL Image, which is required for further transformations.
|
| 6 |
+
transforms.Resize((224, 224)), # Resizes the frame to 224x224 pixels, the input size expected by ResNet50.
|
| 7 |
+
transforms.ToTensor(), # Converts the PIL Image to a PyTorch tensor and scales pixel values to [0, 1].
|
| 8 |
+
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalizes the tensor using the mean and standard deviation of the ImageNet dataset, which ResNet50 was trained on.
|
| 9 |
+
])
|
| 10 |
+
return transform
|
testing.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import torch.optim as optim
|
| 4 |
+
from torch.utils.data import DataLoader, TensorDataset
|
| 5 |
+
import numpy as np
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def testModel(model, test_features, test_labels, dataset, num_frames=32):
|
| 9 |
+
"""
|
| 10 |
+
"Test the LSTM model on the test set."
|
| 11 |
+
"""
|
| 12 |
+
model.eval()
|
| 13 |
+
test_loss = 0.0
|
| 14 |
+
test_total = 0
|
| 15 |
+
test_correct = 0
|
| 16 |
+
|
| 17 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 18 |
+
model.to(device)
|
| 19 |
+
criterion = nn.CrossEntropyLoss()
|
| 20 |
+
|
| 21 |
+
test_dataset = TensorDataset(test_features, test_labels)
|
| 22 |
+
test_loader = DataLoader(test_dataset, batch_size=num_frames, shuffle=False)
|
| 23 |
+
|
| 24 |
+
predicted_labels = []
|
| 25 |
+
all_outputs = []
|
| 26 |
+
|
| 27 |
+
with torch.no_grad():
|
| 28 |
+
for batch_x, batch_y in test_loader:
|
| 29 |
+
batch_x, batch_y = batch_x.to(device), batch_y.to(device)
|
| 30 |
+
outputs = model(batch_x)
|
| 31 |
+
|
| 32 |
+
loss = criterion(outputs, batch_y.long())
|
| 33 |
+
test_loss += loss.item()
|
| 34 |
+
|
| 35 |
+
_, predicted = torch.max(outputs, 1)
|
| 36 |
+
predicted_labels.extend(predicted.cpu().numpy())
|
| 37 |
+
|
| 38 |
+
# Store softmax probabilities
|
| 39 |
+
softmax_outputs = torch.nn.functional.softmax(outputs, dim=1)
|
| 40 |
+
all_outputs.append(softmax_outputs.cpu().numpy())
|
| 41 |
+
|
| 42 |
+
test_correct += (predicted == batch_y).sum().item()
|
| 43 |
+
test_total += batch_y.size(0)
|
| 44 |
+
|
| 45 |
+
y_pred = predicted_labels
|
| 46 |
+
y_pred_proba = np.vstack(all_outputs) # Convert list of arrays to a single numpy array
|
| 47 |
+
|
| 48 |
+
test_loss /= len(test_loader)
|
| 49 |
+
test_accuracy = 100 * test_correct / test_total
|
| 50 |
+
|
| 51 |
+
# Print final testing results
|
| 52 |
+
print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%')
|
| 53 |
+
|
| 54 |
+
# Save the test results to file .txt
|
| 55 |
+
with open(f'./benchmarks/{dataset}/benchmark.txt', 'a') as f:
|
| 56 |
+
f.write(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%\n")
|
| 57 |
+
|
| 58 |
+
return y_pred, y_pred_proba
|
| 59 |
+
|
training.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import torch.optim as optim
|
| 4 |
+
import numpy as np
|
| 5 |
+
from torch.optim.lr_scheduler import StepLR
|
| 6 |
+
from torch.utils.data import DataLoader, TensorDataset
|
| 7 |
+
import matplotlib.pyplot as plt
|
| 8 |
+
import time
|
| 9 |
+
import os
|
| 10 |
+
from lstm import LSTMClassifier, MultiLayerBiLSTMClassifier
|
| 11 |
+
|
| 12 |
+
def loadFeatures(dataset):
|
| 13 |
+
# Load the features and labels from numpy arrays
|
| 14 |
+
train_features = torch.from_numpy(np.load(f'./features/{dataset}/train_features.npy')).float()
|
| 15 |
+
train_labels = torch.from_numpy(np.load(f'./features/{dataset}/train_labels.npy'))
|
| 16 |
+
idx = np.random.permutation(len(train_features))
|
| 17 |
+
train_features, train_labels = train_features[idx], train_labels[idx]
|
| 18 |
+
|
| 19 |
+
val_features = torch.from_numpy(np.load(f'./features/{dataset}/val_features.npy')).float()
|
| 20 |
+
val_labels = torch.from_numpy(np.load(f'./features/{dataset}/val_labels.npy'))
|
| 21 |
+
|
| 22 |
+
test_features = torch.from_numpy(np.load(f'./features/{dataset}/test_features.npy')).float()
|
| 23 |
+
test_labels = torch.from_numpy(np.load(f'./features/{dataset}/test_labels.npy'))
|
| 24 |
+
|
| 25 |
+
return train_features, train_labels, val_features, val_labels, test_features, test_labels
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def trainModel(train_features, train_labels, val_features, val_labels, dataset, num_epochs=100, num_frames=32, hidden_size=256, learning_rate=0.0001):
|
| 29 |
+
"""
|
| 30 |
+
Train the LSTM model with validation.
|
| 31 |
+
"""
|
| 32 |
+
input_size = train_features.shape[-1]
|
| 33 |
+
num_classes = len(np.unique(train_labels))
|
| 34 |
+
|
| 35 |
+
train_losses = []
|
| 36 |
+
train_accuracies = []
|
| 37 |
+
val_losses = []
|
| 38 |
+
val_accuracies = []
|
| 39 |
+
|
| 40 |
+
patience = 10 # Early stopping patience
|
| 41 |
+
best_val_loss = float("inf")
|
| 42 |
+
counter = 0
|
| 43 |
+
|
| 44 |
+
# Instantiate the LSTM model
|
| 45 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 46 |
+
model = LSTMClassifier(input_size, hidden_size, num_classes).cuda()
|
| 47 |
+
model = MultiLayerBiLSTMClassifier(input_size, hidden_size, 2, num_classes).to(device)
|
| 48 |
+
|
| 49 |
+
# Define the loss function and optimizer
|
| 50 |
+
criterion = nn.CrossEntropyLoss()
|
| 51 |
+
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
| 52 |
+
scheduler = StepLR(optimizer, step_size=20, gamma=0.5)
|
| 53 |
+
|
| 54 |
+
# Prepare DataLoaders
|
| 55 |
+
train_dataset = TensorDataset(train_features, train_labels)
|
| 56 |
+
val_dataset = TensorDataset(val_features, val_labels)
|
| 57 |
+
train_loader = DataLoader(train_dataset, batch_size=num_frames, shuffle=True)
|
| 58 |
+
val_loader = DataLoader(val_dataset, batch_size=num_frames, shuffle=False)
|
| 59 |
+
|
| 60 |
+
# Training loop
|
| 61 |
+
start_time = time.time()
|
| 62 |
+
for epoch in range(num_epochs):
|
| 63 |
+
# Training
|
| 64 |
+
model.train()
|
| 65 |
+
train_loss, train_total, train_correct = 0.0, 0, 0
|
| 66 |
+
|
| 67 |
+
for batch_features, batch_labels in train_loader:
|
| 68 |
+
batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
|
| 69 |
+
|
| 70 |
+
optimizer.zero_grad()
|
| 71 |
+
outputs = model(batch_features)
|
| 72 |
+
loss = criterion(outputs, batch_labels.long())
|
| 73 |
+
loss.backward()
|
| 74 |
+
optimizer.step()
|
| 75 |
+
|
| 76 |
+
train_loss += loss.item()
|
| 77 |
+
_, predicted = torch.max(outputs, 1)
|
| 78 |
+
train_correct += (predicted == batch_labels).sum().item()
|
| 79 |
+
train_total += batch_labels.size(0)
|
| 80 |
+
|
| 81 |
+
train_loss = train_loss / len(train_loader)
|
| 82 |
+
train_accuracy = 100 * train_correct / train_total
|
| 83 |
+
train_losses.append(train_loss)
|
| 84 |
+
train_accuracies.append(train_accuracy)
|
| 85 |
+
|
| 86 |
+
# Validation step
|
| 87 |
+
model.eval()
|
| 88 |
+
total_val_loss, correct_val, total_val = 0.0, 0, 0
|
| 89 |
+
with torch.no_grad():
|
| 90 |
+
for batch_features, batch_labels in val_loader:
|
| 91 |
+
batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
|
| 92 |
+
outputs = model(batch_features)
|
| 93 |
+
loss = criterion(outputs, batch_labels.long())
|
| 94 |
+
|
| 95 |
+
total_val_loss += loss.item()
|
| 96 |
+
_, predicted = torch.max(outputs, 1)
|
| 97 |
+
correct_val += (predicted == batch_labels).sum().item()
|
| 98 |
+
total_val += batch_labels.size(0)
|
| 99 |
+
|
| 100 |
+
val_loss = total_val_loss / len(val_loader)
|
| 101 |
+
val_accuracy = 100 * correct_val / total_val
|
| 102 |
+
val_losses.append(val_loss)
|
| 103 |
+
val_accuracies.append(val_accuracy)
|
| 104 |
+
|
| 105 |
+
scheduler.step()
|
| 106 |
+
|
| 107 |
+
# Early stopping
|
| 108 |
+
if val_loss < best_val_loss:
|
| 109 |
+
best_val_loss = val_loss
|
| 110 |
+
counter = 0 # Reset counter if validation loss improves
|
| 111 |
+
else:
|
| 112 |
+
counter += 1 # Increment counter if validation loss does not improve
|
| 113 |
+
if counter >= patience:
|
| 114 |
+
print("Early stopping triggered")
|
| 115 |
+
break
|
| 116 |
+
|
| 117 |
+
# Print training and validation results
|
| 118 |
+
print(f'Epoch: [{epoch+1}/100] Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}% Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%')
|
| 119 |
+
|
| 120 |
+
end_time = time.time()
|
| 121 |
+
print(f'Training completed in {end_time - start_time:.2f} seconds')
|
| 122 |
+
|
| 123 |
+
# Save the training time in file .txt
|
| 124 |
+
with open(f'./benchmarks/{dataset}/benchmark.txt', 'a') as f:
|
| 125 |
+
f.write(f"Training time: {end_time - start_time:.2f} seconds\n")
|
| 126 |
+
|
| 127 |
+
os.makedirs(f'./benchmarks/{dataset}', exist_ok=True)
|
| 128 |
+
# Plot training and testing losses and accuracies
|
| 129 |
+
plt.figure(figsize=(10, 5))
|
| 130 |
+
plt.plot(train_losses, label='Training Loss')
|
| 131 |
+
plt.plot(val_losses, label='Validation Loss')
|
| 132 |
+
plt.xlabel('Epoch')
|
| 133 |
+
plt.ylabel('Loss')
|
| 134 |
+
plt.legend()
|
| 135 |
+
plt.savefig(f'./benchmarks/{dataset}/loss_plot.png')
|
| 136 |
+
|
| 137 |
+
plt.close()
|
| 138 |
+
|
| 139 |
+
plt.figure(figsize=(10, 5))
|
| 140 |
+
plt.plot(train_accuracies, label='Training Accuracy')
|
| 141 |
+
plt.plot(val_accuracies, label='Validation Accuracy')
|
| 142 |
+
plt.xlabel('Epoch')
|
| 143 |
+
plt.ylabel('Accuracy')
|
| 144 |
+
plt.legend()
|
| 145 |
+
plt.savefig(f'./benchmarks/{dataset}/accuracy_plot.png')
|
| 146 |
+
plt.close()
|
| 147 |
+
|
| 148 |
+
# 18/4/2025: Added to Save model weights
|
| 149 |
+
os.makedirs('./models', exist_ok=True)
|
| 150 |
+
torch.save(model.state_dict(), f'./models/{dataset}_lstm_model.pt')
|
| 151 |
+
print(f'Model saved to ./models/{dataset}_lstm_model.pt')
|
| 152 |
+
|
| 153 |
+
return model
|