bd04 commited on
Commit
9339e0c
·
verified ·
1 Parent(s): 7b375cd

Upload 36 files

Browse files
__init__.py ADDED
File without changes
__pycache__/evaluation.cpython-311.pyc ADDED
Binary file (3 kB). View file
 
__pycache__/feature_extraction.cpython-311.pyc ADDED
Binary file (6.61 kB). View file
 
__pycache__/load_dataset.cpython-311.pyc ADDED
Binary file (4.34 kB). View file
 
__pycache__/lstm.cpython-311.pyc ADDED
Binary file (3.79 kB). View file
 
__pycache__/preprocessing.cpython-311.pyc ADDED
Binary file (829 Bytes). View file
 
__pycache__/testing.cpython-311.pyc ADDED
Binary file (3.55 kB). View file
 
__pycache__/training.cpython-311.pyc ADDED
Binary file (9.4 kB). View file
 
benchmarks/ucf101/accuracy_plot.png ADDED
benchmarks/ucf101/benchmark.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Feature extraction time: 1773.93 seconds
2
+ Training time: 106.37 seconds
3
+ Test Loss: 0.2128, Test Accuracy: 94.37%
benchmarks/ucf101/confusion_matrix.png ADDED
benchmarks/ucf101/loss_plot.png ADDED
benchmarks/ucf101/model_performance.png ADDED
benchmarks/ucf11/accuracy_plot.png ADDED
benchmarks/ucf11/benchmark.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Feature extraction time: 88.29 seconds
2
+ Training time: 8.87 seconds
3
+ Test Loss: 0.1348, Test Accuracy: 95.31%
benchmarks/ucf11/confusion_matrix.png ADDED
benchmarks/ucf11/loss_plot.png ADDED
benchmarks/ucf11/model_performance.png ADDED
benchmarks/ucf50/accuracy_plot.png ADDED
benchmarks/ucf50/benchmark.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Feature extraction time: 557.77 seconds
2
+ Training time: 106.57 seconds
3
+ Test Loss: 0.1782, Test Accuracy: 94.76%
benchmarks/ucf50/confusion_matrix.png ADDED
benchmarks/ucf50/loss_plot.png ADDED
benchmarks/ucf50/model_performance.png ADDED
evaluation.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
2
+ import matplotlib.pyplot as plt
3
+ import seaborn as sns
4
+
5
+ def modelEvaluation(y_pred, y_pred_proba, y_test, labels, dataset):
6
+ """
7
+ Evaluate the model performance using various metrics.
8
+ """
9
+ cm = confusion_matrix(y_test, y_pred)
10
+
11
+ # Accuracy score
12
+ acc_score = accuracy_score(y_test, y_pred)
13
+ # Precision score
14
+ pre_score = precision_score(y_test, y_pred, average='macro')
15
+ # Recall score
16
+ rec_score = recall_score(y_test, y_pred, average='macro')
17
+ # F1-score
18
+ f1 = f1_score(y_test, y_pred, average='macro')
19
+ # AUC score
20
+ auc = roc_auc_score(y_test, y_pred_proba, multi_class='ovr')
21
+
22
+ # Draw metrics
23
+ metrics_labels = ['Accuracy', 'Precision', 'Recall', 'F1-Score', 'AUC']
24
+ metrics_values = [acc_score, pre_score, rec_score, f1, auc]
25
+ plt.figure(figsize=(10, 6))
26
+ ax = sns.barplot(
27
+ x=metrics_labels,
28
+ y=metrics_values,
29
+ hue=metrics_labels, # Assign `hue` to `x` variable
30
+ dodge=False,
31
+ palette=["#FF6F61", "#92A8D1", "#88B04B", "#F7CAC9", "#61ffbd"], # Add 5 colors
32
+ legend=False # Disable legend
33
+ )
34
+ for i, v in enumerate(metrics_values):
35
+ ax.text(i, v - 0.04, f"{v:.4f}", ha='center', va='bottom', fontsize=10)
36
+ plt.title("Model Performance Metrics")
37
+ plt.ylim(0, 1)
38
+ plt.ylabel("Score")
39
+ plt.xlabel("Metrics")
40
+ plt.savefig(f"./benchmarks/{dataset}/model_performance.png")
41
+ plt.close()
42
+
43
+
44
+ # Draw confusion matrix
45
+ plt.figure(figsize=(12, 10))
46
+ sns.heatmap(cm, annot=False, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)
47
+ plt.title(f"Confusion Matrix", pad=20) # Adjust title position
48
+ plt.xlabel("Predicted", labelpad=15) # Adjust x-axis label position
49
+ plt.ylabel("Actual", labelpad=15) # Adjust y-axis label position
50
+ plt.savefig(f"./benchmarks/{dataset}/confusion_matrix.png", bbox_inches='tight') # Ensure the image is tightly cropped
51
+
52
+ return acc_score, pre_score, rec_score, f1, auc
feature_extraction.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sklearn.preprocessing import LabelEncoder
3
+ import torch
4
+ import os
5
+ from torchvision import models
6
+ import time
7
+ import json
8
+
9
+ def extract_features(samples, transform, dataset):
10
+ print("Extracting features using ResNet50...")
11
+ # Load the pre-trained ResNet50 model
12
+ start_time = time.time()
13
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
+ resnet = models.resnet50(weights=models.ResNet50_Weights.DEFAULT).to(device)
15
+ resnet.eval()
16
+
17
+ # Remove the last layer of the ResNet50 model to obtain the feature extractor
18
+ resnet_feat = torch.nn.Sequential(*list(resnet.children())[:-1]).to(device)
19
+ resnet_feat.eval()
20
+
21
+ processed_samples = []
22
+ for frames, label in samples:
23
+ transformed_frames = [transform(frame).to(device) for frame in frames]
24
+ frames_tensor = torch.stack(transformed_frames, dim=0).to(device)
25
+ with torch.no_grad():
26
+ features_tensor = resnet_feat(frames_tensor) # Shape: (T, 2048, 1, 1)
27
+ features = torch.flatten(features_tensor, start_dim=1).cpu().numpy()
28
+
29
+ processed_samples.append((features, label))
30
+
31
+ end_time = time.time()
32
+ print(f"Feature extraction completed in {end_time - start_time:.2f} seconds")
33
+
34
+ os.makedirs(f'./benchmarks/{dataset}', exist_ok=True)
35
+
36
+ # Save the feature extraction time in file .txt
37
+ with open(f'./benchmarks/{dataset}/benchmark.txt', 'w') as f:
38
+ f.write(f"Feature extraction time: {end_time - start_time:.2f} seconds\n")
39
+
40
+ return processed_samples
41
+
42
+ def splittingData(samples, dataset):
43
+ # Shuffle the samples
44
+ np.random.shuffle(samples)
45
+
46
+ # Split the samples into training and testing sets (80% training, 20% testing)
47
+ split_idx = int(0.8 * len(samples))
48
+ train_samples = samples[:split_idx]
49
+ test_samples = samples[split_idx:]
50
+
51
+ # Split the training sets into validation and training sets (80% training, 20% validation)
52
+ validation_split_idx = int(0.8 * len(train_samples))
53
+ train_samples, val_samples = train_samples[:validation_split_idx], train_samples[validation_split_idx:]
54
+
55
+ # Separate features and labels for training, validation, and testing sets
56
+ train_features, train_labels = zip(*train_samples)
57
+ val_features, val_labels = zip(*val_samples)
58
+ test_features, test_labels = zip(*test_samples)
59
+
60
+ # Convert the labels to numerical labels using a LabelEncoder
61
+ le = LabelEncoder()
62
+ train_labels = le.fit_transform(train_labels)
63
+ val_labels = le.transform(val_labels)
64
+ test_labels = le.transform(test_labels)
65
+
66
+ train_features = np.array(train_features)
67
+ val_features = np.array(val_features)
68
+ test_features = np.array(test_features)
69
+
70
+ # Print the shapes of the features and labels arrays
71
+ print("Train Features shape:", train_features.shape)
72
+ print("Train Labels shape:", train_labels.shape)
73
+ print("Validation Features shape:", val_features.shape)
74
+ print("Validation Labels shape:", val_labels.shape)
75
+ print("Test Features shape:", test_features.shape)
76
+ print("Test Labels shape:", test_labels.shape)
77
+
78
+ os.makedirs(f'./features/{dataset}', exist_ok=True)
79
+
80
+ # Save the features and labels to numpy arrays
81
+ np.save(f'./features/{dataset}/train_features.npy', train_features)
82
+ np.save(f'./features/{dataset}/train_labels.npy', train_labels)
83
+ np.save(f'./features/{dataset}/val_features.npy', val_features)
84
+ np.save(f'./features/{dataset}/val_labels.npy', val_labels)
85
+ np.save(f'./features/{dataset}/test_features.npy', test_features)
86
+ np.save(f'./features/{dataset}/test_labels.npy', test_labels)
87
+
88
+ idx2label = {i: label for i, label in enumerate(le.classes_)}
89
+ with open(f'./features/{dataset}/label_map_idx2label.json', 'w') as f:
90
+ json.dump(idx2label, f, indent=4)
91
+
92
+ # Save the LabelEncoder for later use
93
+ return le
inference.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import cv2
3
+ import numpy as np
4
+ from torchvision import models, transforms
5
+ from torchvision.models import resnet50, ResNet50_Weights
6
+ from lstm import MultiLayerBiLSTMClassifier
7
+ from preprocessing import preprocessingData
8
+ import argparse
9
+ import os
10
+ import json
11
+
12
+ def load_label_map(dataset):
13
+ label_path = f"src/label_map_idx2label_{dataset}.json"
14
+ if not os.path.exists(label_path):
15
+ raise FileNotFoundError(f"Label map not found: {label_path}")
16
+ with open(label_path, "r", encoding="utf-8") as f:
17
+ return json.load(f)
18
+
19
+ def read_video_frames(video_path, num_frames=16):
20
+ cap = cv2.VideoCapture(video_path)
21
+ if not cap.isOpened():
22
+ raise RuntimeError(f"Cannot open video file: {video_path}")
23
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
24
+ if total_frames == 0:
25
+ raise RuntimeError(f"Video contains no frames: {video_path}")
26
+
27
+ frame_indices = np.linspace(0, total_frames - 1, num_frames).astype(int)
28
+ frames = []
29
+ for idx in range(total_frames):
30
+ ret, frame = cap.read()
31
+ if not ret:
32
+ break
33
+ if idx in frame_indices:
34
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
35
+ frames.append(frame_rgb)
36
+ cap.release()
37
+
38
+ if len(frames) == 0:
39
+ raise RuntimeError("No frames extracted from video.")
40
+ while len(frames) < num_frames:
41
+ frames.append(frames[-1])
42
+
43
+ return frames[:num_frames]
44
+
45
+ def load_model(model_path, input_size, hidden_size, num_layers, num_classes):
46
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
47
+ model = MultiLayerBiLSTMClassifier(input_size, hidden_size, num_layers, num_classes).to(device)
48
+ model.load_state_dict(torch.load(model_path, map_location=device))
49
+ model.eval()
50
+ return model
51
+
52
+ def inference(dataset, video_path, model_path):
53
+ num_frames = 32
54
+ hidden_size = 256
55
+ num_layers = 2
56
+
57
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
58
+
59
+ # Load label map and number of classes
60
+ label_map = load_label_map(dataset)
61
+ num_classes = len(label_map)
62
+
63
+ # Step 1: Read and process video
64
+ frames = read_video_frames(video_path, num_frames)
65
+ transform = preprocessingData()
66
+ transformed_frames = [transform(frame) for frame in frames]
67
+ frames_tensor = torch.stack(transformed_frames, dim=0).to(device)
68
+
69
+ # Step 2: Extract features
70
+ resnet = models.resnet50(weights=ResNet50_Weights.DEFAULT).to(device)
71
+ resnet_feat = torch.nn.Sequential(*list(resnet.children())[:-1])
72
+ resnet.eval()
73
+ with torch.no_grad():
74
+ features_tensor = resnet_feat(frames_tensor)
75
+ features = torch.flatten(features_tensor, start_dim=1).cpu().numpy()
76
+
77
+ # Step 3: Load model
78
+ input_size = features.shape[1]
79
+ model = load_model(model_path, input_size, hidden_size, num_layers, num_classes)
80
+
81
+ # Step 4: Predict
82
+ with torch.no_grad():
83
+ input_seq = torch.from_numpy(features).unsqueeze(0).float().to(device)
84
+ outputs = model(input_seq)
85
+ predicted_class = torch.argmax(outputs, dim=1).item()
86
+ predicted_label = label_map[str(predicted_class)]
87
+
88
+ print(f"Predicted class index: {predicted_class} ({predicted_label})")
89
+
90
+ if __name__ == "__main__":
91
+ parser = argparse.ArgumentParser(description="Inference on a single video using trained HAR model")
92
+ parser.add_argument("dataset", type=str, help="Dataset used to train model (ucf11 or ucf50)")
93
+ parser.add_argument("video_path", type=str, help="Path to input video file")
94
+ parser.add_argument("model_path", type=str, help="Path to trained model (.pt)")
95
+ args = parser.parse_args()
96
+
97
+ inference(args.dataset.lower(), args.video_path, args.model_path)
label_map_idx2label_ucf101.json ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "0": "ApplyEyeMakeup",
3
+ "1": "ApplyLipstick",
4
+ "2": "Archery",
5
+ "3": "BabyCrawling",
6
+ "4": "BalanceBeam",
7
+ "5": "BandMarching",
8
+ "6": "BaseballPitch",
9
+ "7": "Basketball",
10
+ "8": "BasketballDunk",
11
+ "9": "BenchPress",
12
+ "10": "Biking",
13
+ "11": "Billiards",
14
+ "12": "BlowDryHair",
15
+ "13": "BlowingCandles",
16
+ "14": "BodyWeightSquats",
17
+ "15": "Bowling",
18
+ "16": "BoxingPunchingBag",
19
+ "17": "BoxingSpeedBag",
20
+ "18": "BreastStroke",
21
+ "19": "BrushingTeeth",
22
+ "20": "CleanAndJerk",
23
+ "21": "CliffDiving",
24
+ "22": "CricketBowling",
25
+ "23": "CricketShot",
26
+ "24": "CuttingInKitchen",
27
+ "25": "Diving",
28
+ "26": "Drumming",
29
+ "27": "Fencing",
30
+ "28": "FieldHockeyPenalty",
31
+ "29": "FloorGymnastics",
32
+ "30": "FrisbeeCatch",
33
+ "31": "FrontCrawl",
34
+ "32": "GolfSwing",
35
+ "33": "Haircut",
36
+ "34": "HammerThrow",
37
+ "35": "HandstandPushups",
38
+ "36": "HandstandWalking",
39
+ "37": "HeadMassage",
40
+ "38": "HighJump",
41
+ "39": "HorseRace",
42
+ "40": "HorseRiding",
43
+ "41": "HulaHoop",
44
+ "42": "IceDancing",
45
+ "43": "JavelinThrow",
46
+ "44": "JugglingBalls",
47
+ "45": "JumpRope",
48
+ "46": "JumpingJack",
49
+ "47": "Kayaking",
50
+ "48": "Knitting",
51
+ "49": "LongJump",
52
+ "50": "Lunges",
53
+ "51": "MilitaryParade",
54
+ "52": "Mixing",
55
+ "53": "MoppingFloor",
56
+ "54": "Nunchucks",
57
+ "55": "ParallelBars",
58
+ "56": "PizzaTossing",
59
+ "57": "PlayingCello",
60
+ "58": "PlayingDaf",
61
+ "59": "PlayingDhol",
62
+ "60": "PlayingFlute",
63
+ "61": "PlayingGuitar",
64
+ "62": "PlayingPiano",
65
+ "63": "PlayingSitar",
66
+ "64": "PlayingTabla",
67
+ "65": "PlayingViolin",
68
+ "66": "PoleVault",
69
+ "67": "PommelHorse",
70
+ "68": "PullUps",
71
+ "69": "Punch",
72
+ "70": "PushUps",
73
+ "71": "Rafting",
74
+ "72": "RockClimbingIndoor",
75
+ "73": "RopeClimbing",
76
+ "74": "Rowing",
77
+ "75": "SalsaSpin",
78
+ "76": "ShavingBeard",
79
+ "77": "Shotput",
80
+ "78": "SkateBoarding",
81
+ "79": "Skiing",
82
+ "80": "Skijet",
83
+ "81": "SkyDiving",
84
+ "82": "SoccerJuggling",
85
+ "83": "SoccerPenalty",
86
+ "84": "StillRings",
87
+ "85": "SumoWrestling",
88
+ "86": "Surfing",
89
+ "87": "Swing",
90
+ "88": "TableTennisShot",
91
+ "89": "TaiChi",
92
+ "90": "TennisSwing",
93
+ "91": "ThrowDiscus",
94
+ "92": "TrampolineJumping",
95
+ "93": "Typing",
96
+ "94": "UnevenBars",
97
+ "95": "VolleyballSpiking",
98
+ "96": "WalkingWithDog",
99
+ "97": "WallPushups",
100
+ "98": "WritingOnBoard",
101
+ "99": "YoYo",
102
+ "100": "PushUpsClap"
103
+ }
label_map_idx2label_ucf11.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "0": "basketball_shooting",
3
+ "1": "biking",
4
+ "2": "diving",
5
+ "3": "golf_swing",
6
+ "4": "horse_riding",
7
+ "5": "soccer_juggling",
8
+ "6": "swing",
9
+ "7": "tennis_swing",
10
+ "8": "trampoline_jumping",
11
+ "9": "volleyball_spiking",
12
+ "10": "walking"
13
+ }
label_map_idx2label_ucf50.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "0": "Baseball Pitch",
3
+ "1": "Basketball Shooting",
4
+ "2": "Bench Press",
5
+ "3": "Biking",
6
+ "4": "Billiards",
7
+ "5": "BreastStroke",
8
+ "6": "Clean and Jerk",
9
+ "7": "Cricket Bowling",
10
+ "8": "Cricket Shot",
11
+ "9": "Diving",
12
+ "10": "Drumming",
13
+ "11": "Fencing",
14
+ "12": "Floor Gymnastics",
15
+ "13": "Golf Swing",
16
+ "14": "Hammer Throw",
17
+ "15": "High Jump",
18
+ "16": "Horse Race",
19
+ "17": "Horse Riding",
20
+ "18": "Hula Hoop",
21
+ "19": "Javelin Throw",
22
+ "20": "Juggling Balls",
23
+ "21": "Jump Rope",
24
+ "22": "Jumping Jack",
25
+ "23": "Kayaking",
26
+ "24": "Lunges",
27
+ "25": "Military Parade",
28
+ "26": "Mixing Batter",
29
+ "27": "Nun Chucks",
30
+ "28": "Parallel Bars",
31
+ "29": "Pizza Tossing",
32
+ "30": "Playing Cello",
33
+ "31": "Playing Daf",
34
+ "32": "Playing Dhol",
35
+ "33": "Playing Flute",
36
+ "34": "Playing Guitar",
37
+ "35": "Playing Piano",
38
+ "36": "Playing Tabla",
39
+ "37": "Playing Violin",
40
+ "38": "Pole Vault",
41
+ "39": "Pommel Horse",
42
+ "40": "Pull Ups",
43
+ "41": "Punch",
44
+ "42": "Push Ups",
45
+ "43": "Rock Climbing Indoor",
46
+ "44": "Rope Climbing",
47
+ "45": "Rowing",
48
+ "46": "Salsa Spin",
49
+ "47": "Skate Boarding",
50
+ "48": "Skiing",
51
+ "49": "Skijet",
52
+ "50": "Soccer Juggling"
53
+ }
load_dataset.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import rarfile
4
+ import patoolib
5
+ import tempfile
6
+ import shutil
7
+ import random
8
+
9
+ def read_UCF11(data_dir, num_frames):
10
+ """
11
+ Reads video data, applies transformations, and extracts features using ResNet50.
12
+ This function is used for UCF11 dataset.
13
+ """
14
+ samples= [] # List to store the features and labels
15
+ # Loop over the videos in the dataset folder
16
+ for label in os.listdir(data_dir):
17
+ label_dir = os.path.join(data_dir, label)
18
+ print(label_dir)
19
+ for sub_dir in os.listdir(label_dir):
20
+ if sub_dir == 'Annotation':
21
+ continue
22
+ video_dir = os.path.join(label_dir, sub_dir)
23
+ for video_file in os.listdir(video_dir):
24
+ video_path = os.path.join(video_dir, video_file)
25
+ cap = cv2.VideoCapture(video_path)
26
+ frame_count = 0
27
+ frames = []
28
+ while True:
29
+ ret, frame = cap.read()
30
+ if ret:
31
+ frame_count += 1
32
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
33
+ frames.append(frame)
34
+ if frame_count == num_frames:
35
+ break
36
+ else:
37
+ break
38
+ cap.release()
39
+ if len(frames) == num_frames:
40
+ samples.append((frames, label))
41
+ return samples
42
+
43
+ def read_UCF50(data_dir, num_frames):
44
+ """
45
+ Reads video data, applies transformations, and extracts features using ResNet50.
46
+ This function is used for UCF50 dataset.
47
+ """
48
+ samples= [] # List to store the features and labels
49
+ # Loop over the videos in the dataset folder
50
+ for label in os.listdir(data_dir):
51
+ label_dir = os.path.join(data_dir, label)
52
+ print(label_dir)
53
+ for video_file in os.listdir(label_dir):
54
+ video_path = os.path.join(label_dir, video_file)
55
+ cap = cv2.VideoCapture(video_path)
56
+ frame_count = 0
57
+ frames = []
58
+ while True:
59
+ ret, frame = cap.read()
60
+ if ret:
61
+ frame_count += 1
62
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
63
+ frames.append(frame)
64
+ if frame_count == num_frames:
65
+ break
66
+ else:
67
+ break
68
+ cap.release()
69
+ if len(frames) == num_frames:
70
+ samples.append((frames, label))
71
+ return samples
72
+
73
+
74
+ def read_UCF101(data_dir, num_frames):
75
+ """
76
+ Reads video data, applies transformations, and extracts features using ResNet50.
77
+ This function is used for UCF50 dataset.
78
+ """
79
+ samples= [] # List to store the features and labels
80
+ # Loop over the videos in the dataset folder
81
+ for label in os.listdir(data_dir):
82
+ label_dir = os.path.join(data_dir, label)
83
+ print(label_dir)
84
+
85
+ for video_file in os.listdir(label_dir):
86
+ video_path = os.path.join(label_dir, video_file)
87
+ cap = cv2.VideoCapture(video_path)
88
+ frame_count = 0
89
+ frames = []
90
+ while True:
91
+ ret, frame = cap.read()
92
+ if ret:
93
+ frame_count += 1
94
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
95
+ frames.append(frame)
96
+ if frame_count == num_frames:
97
+ break
98
+ else:
99
+ break
100
+ cap.release()
101
+
102
+ if len(frames) == num_frames:
103
+ samples.append((frames, label))
104
+ return samples
lstm.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+
5
+ class LSTMClassifier(nn.Module):
6
+ def __init__(self, input_size, hidden_size, num_classes):
7
+ super(LSTMClassifier, self).__init__()
8
+ self.hidden_size = hidden_size
9
+ self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
10
+ self.fc = nn.Linear(hidden_size, num_classes)
11
+
12
+ def forward(self, x):
13
+ h0 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
14
+ c0 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
15
+ # Forward propagate LSTM
16
+ out, _ = self.lstm(x, (h0, c0))
17
+ # Decode the hidden state of the last time step
18
+ out = self.fc(out[:, -1, :])
19
+ out = nn.functional.softmax(out, dim=1)
20
+ return out
21
+
22
+ class MultiLayerBiLSTMClassifier(nn.Module):
23
+ def __init__(self, input_size, hidden_size, num_layers, num_classes):
24
+ super().__init__()
25
+ self.hidden_size = hidden_size
26
+ self.num_layers = num_layers
27
+ self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True, dropout=0.2)
28
+ self.fc = nn.Linear(hidden_size*2, num_classes) # *2 to account for bidirectional LSTM
29
+ self.dropout = nn.Dropout(0.2)
30
+
31
+ def forward(self, x):
32
+ # Initialize hidden state and cell state with zeros
33
+ h0 = torch.zeros(2*self.num_layers, x.size(0), self.hidden_size).to(x.device) # *2 to account for bidirectional LSTM
34
+ c0 = torch.zeros(2*self.num_layers, x.size(0), self.hidden_size).to(x.device) # *2 to account for bidirectional LSTM
35
+ # Forward propagate bidirectional LSTM
36
+ out, _ = self.lstm(x, (h0, c0))
37
+ out = self.dropout(out[:, -1, :]) # Apply dropout before FC layer
38
+ # Decode the hidden state of the last time step
39
+ out = self.fc(out)
40
+ #out = nn.functional.softmax(out, dim=1)
41
+ return out
main.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from load_dataset import read_UCF11, read_UCF50, read_UCF101
2
+ from preprocessing import preprocessingData
3
+ from feature_extraction import extract_features, splittingData
4
+ from training import loadFeatures, trainModel
5
+ from testing import testModel
6
+ from evaluation import modelEvaluation
7
+ import numpy as np
8
+ import argparse
9
+
10
+
11
+ def main(dataset, data_dir):
12
+ # Define the number of frames to extract features
13
+ num_frames = 16
14
+
15
+ if dataset.lower() == 'ucf11':
16
+ # Load the UCF11 dataset
17
+ samples = read_UCF11(data_dir, num_frames)
18
+ elif dataset.lower() == 'ucf50':
19
+ # Load the UCF50 dataset
20
+ samples = read_UCF50(data_dir, num_frames)
21
+ elif dataset.lower() == 'ucf101':
22
+ # Load the UCF101 dataset
23
+ samples = read_UCF101(data_dir, num_frames)
24
+
25
+ # Preprocess the data
26
+ processed_data = preprocessingData()
27
+
28
+ # Extract features using ResNet50
29
+ processed_samples = extract_features(samples, processed_data, dataset.lower())
30
+
31
+ # Split the data into training, validation, and testing sets
32
+ le = splittingData(processed_samples, dataset.lower())
33
+
34
+ # Load the features and labels for training, validation, and testing sets
35
+ train_features, train_labels, val_features, val_labels, test_features, test_labels = loadFeatures(dataset.lower())
36
+
37
+ # Train the model
38
+ model = trainModel(train_features, train_labels, val_features, val_labels, dataset.lower())
39
+
40
+ # Test the model
41
+ y_pred, y_pred_proba = testModel(model, test_features, test_labels, dataset.lower(), num_frames)
42
+
43
+ # Evaluate the model
44
+ labels = np.arange(0, len(np.unique(test_labels)), 1)
45
+ acc_score, pre_score, rec_score, f1, auc = modelEvaluation(y_pred, y_pred_proba, test_labels, labels, dataset.lower())
46
+
47
+
48
+ if __name__ == "__main__":
49
+ parser = argparse.ArgumentParser(description='Train and evaluate a model on UCF11 or UCF50 dataset.')
50
+ parser.add_argument('dataset', type=str, help='Dataset to use (UCF11 or UCF50)')
51
+ parser.add_argument('data_dir', type=str, help='Directory containing the dataset')
52
+ args = parser.parse_args()
53
+
54
+ main(args.dataset, args.data_dir)
models/ucf11_lstm_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:849bb832172d8fbdd0bc5fca049c8247c002701358377d8b6db6e04504333575
3
+ size 25224403
preprocessing.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import torchvision.transforms as transforms
2
+
3
+ def preprocessingData():
4
+ transform = transforms.Compose([
5
+ transforms.ToPILImage(), # Converts the frame from a NumPy array to a PIL Image, which is required for further transformations.
6
+ transforms.Resize((224, 224)), # Resizes the frame to 224x224 pixels, the input size expected by ResNet50.
7
+ transforms.ToTensor(), # Converts the PIL Image to a PyTorch tensor and scales pixel values to [0, 1].
8
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalizes the tensor using the mean and standard deviation of the ImageNet dataset, which ResNet50 was trained on.
9
+ ])
10
+ return transform
testing.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.optim as optim
4
+ from torch.utils.data import DataLoader, TensorDataset
5
+ import numpy as np
6
+
7
+
8
+ def testModel(model, test_features, test_labels, dataset, num_frames=32):
9
+ """
10
+ "Test the LSTM model on the test set."
11
+ """
12
+ model.eval()
13
+ test_loss = 0.0
14
+ test_total = 0
15
+ test_correct = 0
16
+
17
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
+ model.to(device)
19
+ criterion = nn.CrossEntropyLoss()
20
+
21
+ test_dataset = TensorDataset(test_features, test_labels)
22
+ test_loader = DataLoader(test_dataset, batch_size=num_frames, shuffle=False)
23
+
24
+ predicted_labels = []
25
+ all_outputs = []
26
+
27
+ with torch.no_grad():
28
+ for batch_x, batch_y in test_loader:
29
+ batch_x, batch_y = batch_x.to(device), batch_y.to(device)
30
+ outputs = model(batch_x)
31
+
32
+ loss = criterion(outputs, batch_y.long())
33
+ test_loss += loss.item()
34
+
35
+ _, predicted = torch.max(outputs, 1)
36
+ predicted_labels.extend(predicted.cpu().numpy())
37
+
38
+ # Store softmax probabilities
39
+ softmax_outputs = torch.nn.functional.softmax(outputs, dim=1)
40
+ all_outputs.append(softmax_outputs.cpu().numpy())
41
+
42
+ test_correct += (predicted == batch_y).sum().item()
43
+ test_total += batch_y.size(0)
44
+
45
+ y_pred = predicted_labels
46
+ y_pred_proba = np.vstack(all_outputs) # Convert list of arrays to a single numpy array
47
+
48
+ test_loss /= len(test_loader)
49
+ test_accuracy = 100 * test_correct / test_total
50
+
51
+ # Print final testing results
52
+ print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%')
53
+
54
+ # Save the test results to file .txt
55
+ with open(f'./benchmarks/{dataset}/benchmark.txt', 'a') as f:
56
+ f.write(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%\n")
57
+
58
+ return y_pred, y_pred_proba
59
+
training.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.optim as optim
4
+ import numpy as np
5
+ from torch.optim.lr_scheduler import StepLR
6
+ from torch.utils.data import DataLoader, TensorDataset
7
+ import matplotlib.pyplot as plt
8
+ import time
9
+ import os
10
+ from lstm import LSTMClassifier, MultiLayerBiLSTMClassifier
11
+
12
+ def loadFeatures(dataset):
13
+ # Load the features and labels from numpy arrays
14
+ train_features = torch.from_numpy(np.load(f'./features/{dataset}/train_features.npy')).float()
15
+ train_labels = torch.from_numpy(np.load(f'./features/{dataset}/train_labels.npy'))
16
+ idx = np.random.permutation(len(train_features))
17
+ train_features, train_labels = train_features[idx], train_labels[idx]
18
+
19
+ val_features = torch.from_numpy(np.load(f'./features/{dataset}/val_features.npy')).float()
20
+ val_labels = torch.from_numpy(np.load(f'./features/{dataset}/val_labels.npy'))
21
+
22
+ test_features = torch.from_numpy(np.load(f'./features/{dataset}/test_features.npy')).float()
23
+ test_labels = torch.from_numpy(np.load(f'./features/{dataset}/test_labels.npy'))
24
+
25
+ return train_features, train_labels, val_features, val_labels, test_features, test_labels
26
+
27
+
28
+ def trainModel(train_features, train_labels, val_features, val_labels, dataset, num_epochs=100, num_frames=32, hidden_size=256, learning_rate=0.0001):
29
+ """
30
+ Train the LSTM model with validation.
31
+ """
32
+ input_size = train_features.shape[-1]
33
+ num_classes = len(np.unique(train_labels))
34
+
35
+ train_losses = []
36
+ train_accuracies = []
37
+ val_losses = []
38
+ val_accuracies = []
39
+
40
+ patience = 10 # Early stopping patience
41
+ best_val_loss = float("inf")
42
+ counter = 0
43
+
44
+ # Instantiate the LSTM model
45
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
46
+ model = LSTMClassifier(input_size, hidden_size, num_classes).cuda()
47
+ model = MultiLayerBiLSTMClassifier(input_size, hidden_size, 2, num_classes).to(device)
48
+
49
+ # Define the loss function and optimizer
50
+ criterion = nn.CrossEntropyLoss()
51
+ optimizer = optim.Adam(model.parameters(), lr=learning_rate)
52
+ scheduler = StepLR(optimizer, step_size=20, gamma=0.5)
53
+
54
+ # Prepare DataLoaders
55
+ train_dataset = TensorDataset(train_features, train_labels)
56
+ val_dataset = TensorDataset(val_features, val_labels)
57
+ train_loader = DataLoader(train_dataset, batch_size=num_frames, shuffle=True)
58
+ val_loader = DataLoader(val_dataset, batch_size=num_frames, shuffle=False)
59
+
60
+ # Training loop
61
+ start_time = time.time()
62
+ for epoch in range(num_epochs):
63
+ # Training
64
+ model.train()
65
+ train_loss, train_total, train_correct = 0.0, 0, 0
66
+
67
+ for batch_features, batch_labels in train_loader:
68
+ batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
69
+
70
+ optimizer.zero_grad()
71
+ outputs = model(batch_features)
72
+ loss = criterion(outputs, batch_labels.long())
73
+ loss.backward()
74
+ optimizer.step()
75
+
76
+ train_loss += loss.item()
77
+ _, predicted = torch.max(outputs, 1)
78
+ train_correct += (predicted == batch_labels).sum().item()
79
+ train_total += batch_labels.size(0)
80
+
81
+ train_loss = train_loss / len(train_loader)
82
+ train_accuracy = 100 * train_correct / train_total
83
+ train_losses.append(train_loss)
84
+ train_accuracies.append(train_accuracy)
85
+
86
+ # Validation step
87
+ model.eval()
88
+ total_val_loss, correct_val, total_val = 0.0, 0, 0
89
+ with torch.no_grad():
90
+ for batch_features, batch_labels in val_loader:
91
+ batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
92
+ outputs = model(batch_features)
93
+ loss = criterion(outputs, batch_labels.long())
94
+
95
+ total_val_loss += loss.item()
96
+ _, predicted = torch.max(outputs, 1)
97
+ correct_val += (predicted == batch_labels).sum().item()
98
+ total_val += batch_labels.size(0)
99
+
100
+ val_loss = total_val_loss / len(val_loader)
101
+ val_accuracy = 100 * correct_val / total_val
102
+ val_losses.append(val_loss)
103
+ val_accuracies.append(val_accuracy)
104
+
105
+ scheduler.step()
106
+
107
+ # Early stopping
108
+ if val_loss < best_val_loss:
109
+ best_val_loss = val_loss
110
+ counter = 0 # Reset counter if validation loss improves
111
+ else:
112
+ counter += 1 # Increment counter if validation loss does not improve
113
+ if counter >= patience:
114
+ print("Early stopping triggered")
115
+ break
116
+
117
+ # Print training and validation results
118
+ print(f'Epoch: [{epoch+1}/100] Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}% Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%')
119
+
120
+ end_time = time.time()
121
+ print(f'Training completed in {end_time - start_time:.2f} seconds')
122
+
123
+ # Save the training time in file .txt
124
+ with open(f'./benchmarks/{dataset}/benchmark.txt', 'a') as f:
125
+ f.write(f"Training time: {end_time - start_time:.2f} seconds\n")
126
+
127
+ os.makedirs(f'./benchmarks/{dataset}', exist_ok=True)
128
+ # Plot training and testing losses and accuracies
129
+ plt.figure(figsize=(10, 5))
130
+ plt.plot(train_losses, label='Training Loss')
131
+ plt.plot(val_losses, label='Validation Loss')
132
+ plt.xlabel('Epoch')
133
+ plt.ylabel('Loss')
134
+ plt.legend()
135
+ plt.savefig(f'./benchmarks/{dataset}/loss_plot.png')
136
+
137
+ plt.close()
138
+
139
+ plt.figure(figsize=(10, 5))
140
+ plt.plot(train_accuracies, label='Training Accuracy')
141
+ plt.plot(val_accuracies, label='Validation Accuracy')
142
+ plt.xlabel('Epoch')
143
+ plt.ylabel('Accuracy')
144
+ plt.legend()
145
+ plt.savefig(f'./benchmarks/{dataset}/accuracy_plot.png')
146
+ plt.close()
147
+
148
+ # 18/4/2025: Added to Save model weights
149
+ os.makedirs('./models', exist_ok=True)
150
+ torch.save(model.state_dict(), f'./models/{dataset}_lstm_model.pt')
151
+ print(f'Model saved to ./models/{dataset}_lstm_model.pt')
152
+
153
+ return model