bd04 commited on Dec 3, 2025

Commit

9339e0c

verified ·

1 Parent(s): 7b375cd

Upload 36 files

Browse files

Files changed (36) hide show

__init__.py +0 -0
__pycache__/evaluation.cpython-311.pyc +0 -0
__pycache__/feature_extraction.cpython-311.pyc +0 -0
__pycache__/load_dataset.cpython-311.pyc +0 -0
__pycache__/lstm.cpython-311.pyc +0 -0
__pycache__/preprocessing.cpython-311.pyc +0 -0
__pycache__/testing.cpython-311.pyc +0 -0
__pycache__/training.cpython-311.pyc +0 -0
benchmarks/ucf101/accuracy_plot.png +0 -0
benchmarks/ucf101/benchmark.txt +3 -0
benchmarks/ucf101/confusion_matrix.png +0 -0
benchmarks/ucf101/loss_plot.png +0 -0
benchmarks/ucf101/model_performance.png +0 -0
benchmarks/ucf11/accuracy_plot.png +0 -0
benchmarks/ucf11/benchmark.txt +3 -0
benchmarks/ucf11/confusion_matrix.png +0 -0
benchmarks/ucf11/loss_plot.png +0 -0
benchmarks/ucf11/model_performance.png +0 -0
benchmarks/ucf50/accuracy_plot.png +0 -0
benchmarks/ucf50/benchmark.txt +3 -0
benchmarks/ucf50/confusion_matrix.png +0 -0
benchmarks/ucf50/loss_plot.png +0 -0
benchmarks/ucf50/model_performance.png +0 -0
evaluation.py +52 -0
feature_extraction.py +93 -0
inference.py +97 -0
label_map_idx2label_ucf101.json +103 -0
label_map_idx2label_ucf11.json +13 -0
label_map_idx2label_ucf50.json +53 -0
load_dataset.py +104 -0
lstm.py +41 -0
main.py +54 -0
models/ucf11_lstm_model.pt +3 -0
preprocessing.py +10 -0
testing.py +59 -0
training.py +153 -0

__init__.py ADDED Viewed

File without changes

__pycache__/evaluation.cpython-311.pyc ADDED Viewed

Binary file (3 kB). View file

__pycache__/feature_extraction.cpython-311.pyc ADDED Viewed

Binary file (6.61 kB). View file

__pycache__/load_dataset.cpython-311.pyc ADDED Viewed

Binary file (4.34 kB). View file

__pycache__/lstm.cpython-311.pyc ADDED Viewed

Binary file (3.79 kB). View file

__pycache__/preprocessing.cpython-311.pyc ADDED Viewed

Binary file (829 Bytes). View file

__pycache__/testing.cpython-311.pyc ADDED Viewed

Binary file (3.55 kB). View file

__pycache__/training.cpython-311.pyc ADDED Viewed

Binary file (9.4 kB). View file

benchmarks/ucf101/accuracy_plot.png ADDED Viewed

benchmarks/ucf101/benchmark.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+Feature extraction time: 1773.93 seconds
+Training time: 106.37 seconds
+Test Loss: 0.2128, Test Accuracy: 94.37%

benchmarks/ucf101/confusion_matrix.png ADDED Viewed

benchmarks/ucf101/loss_plot.png ADDED Viewed

benchmarks/ucf101/model_performance.png ADDED Viewed

benchmarks/ucf11/accuracy_plot.png ADDED Viewed

benchmarks/ucf11/benchmark.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+Feature extraction time: 88.29 seconds
+Training time: 8.87 seconds
+Test Loss: 0.1348, Test Accuracy: 95.31%

benchmarks/ucf11/confusion_matrix.png ADDED Viewed

benchmarks/ucf11/loss_plot.png ADDED Viewed

benchmarks/ucf11/model_performance.png ADDED Viewed

benchmarks/ucf50/accuracy_plot.png ADDED Viewed

benchmarks/ucf50/benchmark.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+Feature extraction time: 557.77 seconds
+Training time: 106.57 seconds
+Test Loss: 0.1782, Test Accuracy: 94.76%

benchmarks/ucf50/confusion_matrix.png ADDED Viewed

benchmarks/ucf50/loss_plot.png ADDED Viewed

benchmarks/ucf50/model_performance.png ADDED Viewed

evaluation.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
+import matplotlib.pyplot as plt
+import seaborn as sns
+def modelEvaluation(y_pred, y_pred_proba, y_test, labels, dataset):
+    """
+    Evaluate the model performance using various metrics.
+    """
+    cm = confusion_matrix(y_test, y_pred)
+    # Accuracy score
+    acc_score = accuracy_score(y_test, y_pred)
+    # Precision score
+    pre_score = precision_score(y_test, y_pred, average='macro')
+    # Recall score
+    rec_score = recall_score(y_test, y_pred, average='macro')
+    # F1-score
+    f1 = f1_score(y_test, y_pred, average='macro')
+    # AUC score
+    auc = roc_auc_score(y_test, y_pred_proba, multi_class='ovr')
+    # Draw metrics
+    metrics_labels = ['Accuracy', 'Precision', 'Recall', 'F1-Score', 'AUC']
+    metrics_values = [acc_score, pre_score, rec_score, f1, auc]
+    plt.figure(figsize=(10, 6))
+    ax = sns.barplot(
+        x=metrics_labels,
+        y=metrics_values,
+        hue=metrics_labels,  # Assign `hue` to `x` variable
+        dodge=False,
+        palette=["#FF6F61", "#92A8D1", "#88B04B", "#F7CAC9", "#61ffbd"],  # Add 5 colors
+        legend=False  # Disable legend
+    )
+    for i, v in enumerate(metrics_values):
+        ax.text(i, v - 0.04, f"{v:.4f}", ha='center', va='bottom', fontsize=10)
+    plt.title("Model Performance Metrics")
+    plt.ylim(0, 1)
+    plt.ylabel("Score")
+    plt.xlabel("Metrics")
+    plt.savefig(f"./benchmarks/{dataset}/model_performance.png")
+    plt.close()
+    # Draw confusion matrix
+    plt.figure(figsize=(12, 10))
+    sns.heatmap(cm, annot=False, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)
+    plt.title(f"Confusion Matrix", pad=20)  # Adjust title position
+    plt.xlabel("Predicted", labelpad=15)  # Adjust x-axis label position
+    plt.ylabel("Actual", labelpad=15)  # Adjust y-axis label position
+    plt.savefig(f"./benchmarks/{dataset}/confusion_matrix.png", bbox_inches='tight')  # Ensure the image is tightly cropped
+    return acc_score, pre_score, rec_score, f1, auc

feature_extraction.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import numpy as np
+from sklearn.preprocessing import LabelEncoder
+import torch
+import os
+from torchvision import models
+import time
+import json
+def extract_features(samples, transform, dataset):
+    print("Extracting features using ResNet50...")
+    # Load the pre-trained ResNet50 model
+    start_time = time.time()
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    resnet = models.resnet50(weights=models.ResNet50_Weights.DEFAULT).to(device)
+    resnet.eval()
+    # Remove the last layer of the ResNet50 model to obtain the feature extractor
+    resnet_feat = torch.nn.Sequential(*list(resnet.children())[:-1]).to(device)
+    resnet_feat.eval()
+    processed_samples = []
+    for frames, label in samples:
+        transformed_frames = [transform(frame).to(device) for frame in frames]
+        frames_tensor = torch.stack(transformed_frames, dim=0).to(device)
+        with torch.no_grad():
+            features_tensor = resnet_feat(frames_tensor)  # Shape: (T, 2048, 1, 1)
+            features = torch.flatten(features_tensor, start_dim=1).cpu().numpy()
+        processed_samples.append((features, label))
+    end_time = time.time()
+    print(f"Feature extraction completed in {end_time - start_time:.2f} seconds")
+    os.makedirs(f'./benchmarks/{dataset}', exist_ok=True)
+    # Save the feature extraction time in file .txt
+    with open(f'./benchmarks/{dataset}/benchmark.txt', 'w') as f:
+        f.write(f"Feature extraction time: {end_time - start_time:.2f} seconds\n")
+    return processed_samples
+def splittingData(samples, dataset):
+    # Shuffle the samples
+    np.random.shuffle(samples)
+    # Split the samples into training and testing sets (80% training, 20% testing)
+    split_idx = int(0.8 * len(samples))
+    train_samples = samples[:split_idx]
+    test_samples = samples[split_idx:]
+    # Split the training sets into validation and training sets (80% training, 20% validation)
+    validation_split_idx = int(0.8 * len(train_samples))
+    train_samples, val_samples = train_samples[:validation_split_idx], train_samples[validation_split_idx:]
+    # Separate features and labels for training, validation, and testing sets
+    train_features, train_labels = zip(*train_samples)
+    val_features, val_labels = zip(*val_samples)
+    test_features, test_labels = zip(*test_samples)
+    # Convert the labels to numerical labels using a LabelEncoder
+    le = LabelEncoder()
+    train_labels = le.fit_transform(train_labels)
+    val_labels = le.transform(val_labels)
+    test_labels = le.transform(test_labels)
+    train_features = np.array(train_features)
+    val_features = np.array(val_features)
+    test_features = np.array(test_features)
+    # Print the shapes of the features and labels arrays
+    print("Train Features shape:", train_features.shape)
+    print("Train Labels shape:", train_labels.shape)
+    print("Validation Features shape:", val_features.shape)
+    print("Validation Labels shape:", val_labels.shape)
+    print("Test Features shape:", test_features.shape)
+    print("Test Labels shape:", test_labels.shape)
+    os.makedirs(f'./features/{dataset}', exist_ok=True)
+    # Save the features and labels to numpy arrays
+    np.save(f'./features/{dataset}/train_features.npy', train_features)
+    np.save(f'./features/{dataset}/train_labels.npy', train_labels)
+    np.save(f'./features/{dataset}/val_features.npy', val_features)
+    np.save(f'./features/{dataset}/val_labels.npy', val_labels)
+    np.save(f'./features/{dataset}/test_features.npy', test_features)
+    np.save(f'./features/{dataset}/test_labels.npy', test_labels)
+    idx2label = {i: label for i, label in enumerate(le.classes_)}
+    with open(f'./features/{dataset}/label_map_idx2label.json', 'w') as f:
+        json.dump(idx2label, f, indent=4)
+    # Save the LabelEncoder for later use
+    return le

inference.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import torch
+import cv2
+import numpy as np
+from torchvision import models, transforms
+from torchvision.models import resnet50, ResNet50_Weights
+from lstm import MultiLayerBiLSTMClassifier
+from preprocessing import preprocessingData
+import argparse
+import os
+import json
+def load_label_map(dataset):
+	label_path = f"src/label_map_idx2label_{dataset}.json"
+	if not os.path.exists(label_path):
+		raise FileNotFoundError(f"Label map not found: {label_path}")
+	with open(label_path, "r", encoding="utf-8") as f:
+		return json.load(f)
+def read_video_frames(video_path, num_frames=16):
+	cap = cv2.VideoCapture(video_path)
+	if not cap.isOpened():
+		raise RuntimeError(f"Cannot open video file: {video_path}")
+	total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+	if total_frames == 0:
+		raise RuntimeError(f"Video contains no frames: {video_path}")
+	frame_indices = np.linspace(0, total_frames - 1, num_frames).astype(int)
+	frames = []
+	for idx in range(total_frames):
+		ret, frame = cap.read()
+		if not ret:
+			break
+		if idx in frame_indices:
+			frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+			frames.append(frame_rgb)
+	cap.release()
+	if len(frames) == 0:
+		raise RuntimeError("No frames extracted from video.")
+	while len(frames) < num_frames:
+		frames.append(frames[-1])
+	return frames[:num_frames]
+def load_model(model_path, input_size, hidden_size, num_layers, num_classes):
+	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+	model = MultiLayerBiLSTMClassifier(input_size, hidden_size, num_layers, num_classes).to(device)
+	model.load_state_dict(torch.load(model_path, map_location=device))
+	model.eval()
+	return model
+def inference(dataset, video_path, model_path):
+	num_frames = 32
+	hidden_size = 256
+	num_layers = 2
+	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+	# Load label map and number of classes
+	label_map = load_label_map(dataset)
+	num_classes = len(label_map)
+	# Step 1: Read and process video
+	frames = read_video_frames(video_path, num_frames)
+	transform = preprocessingData()
+	transformed_frames = [transform(frame) for frame in frames]
+	frames_tensor = torch.stack(transformed_frames, dim=0).to(device)
+	# Step 2: Extract features
+	resnet = models.resnet50(weights=ResNet50_Weights.DEFAULT).to(device)
+	resnet_feat = torch.nn.Sequential(*list(resnet.children())[:-1])
+	resnet.eval()
+	with torch.no_grad():
+		features_tensor = resnet_feat(frames_tensor)
+	features = torch.flatten(features_tensor, start_dim=1).cpu().numpy()
+	# Step 3: Load model
+	input_size = features.shape[1]
+	model = load_model(model_path, input_size, hidden_size, num_layers, num_classes)
+	# Step 4: Predict
+	with torch.no_grad():
+		input_seq = torch.from_numpy(features).unsqueeze(0).float().to(device)
+		outputs = model(input_seq)
+		predicted_class = torch.argmax(outputs, dim=1).item()
+		predicted_label = label_map[str(predicted_class)]
+	print(f"Predicted class index: {predicted_class} ({predicted_label})")
+if __name__ == "__main__":
+	parser = argparse.ArgumentParser(description="Inference on a single video using trained HAR model")
+	parser.add_argument("dataset", type=str, help="Dataset used to train model (ucf11 or ucf50)")
+	parser.add_argument("video_path", type=str, help="Path to input video file")
+	parser.add_argument("model_path", type=str, help="Path to trained model (.pt)")
+	args = parser.parse_args()
+	inference(args.dataset.lower(), args.video_path, args.model_path)

label_map_idx2label_ucf101.json ADDED Viewed

	@@ -0,0 +1,103 @@

+{
+	"0": "ApplyEyeMakeup",
+	"1": "ApplyLipstick",
+	"2": "Archery",
+	"3": "BabyCrawling",
+	"4": "BalanceBeam",
+	"5": "BandMarching",
+	"6": "BaseballPitch",
+	"7": "Basketball",
+	"8": "BasketballDunk",
+	"9": "BenchPress",
+	"10": "Biking",
+	"11": "Billiards",
+	"12": "BlowDryHair",
+	"13": "BlowingCandles",
+	"14": "BodyWeightSquats",
+	"15": "Bowling",
+	"16": "BoxingPunchingBag",
+	"17": "BoxingSpeedBag",
+	"18": "BreastStroke",
+	"19": "BrushingTeeth",
+	"20": "CleanAndJerk",
+	"21": "CliffDiving",
+	"22": "CricketBowling",
+	"23": "CricketShot",
+	"24": "CuttingInKitchen",
+	"25": "Diving",
+	"26": "Drumming",
+	"27": "Fencing",
+	"28": "FieldHockeyPenalty",
+	"29": "FloorGymnastics",
+	"30": "FrisbeeCatch",
+	"31": "FrontCrawl",
+	"32": "GolfSwing",
+	"33": "Haircut",
+	"34": "HammerThrow",
+	"35": "HandstandPushups",
+	"36": "HandstandWalking",
+	"37": "HeadMassage",
+	"38": "HighJump",
+	"39": "HorseRace",
+	"40": "HorseRiding",
+	"41": "HulaHoop",
+	"42": "IceDancing",
+	"43": "JavelinThrow",
+	"44": "JugglingBalls",
+	"45": "JumpRope",
+	"46": "JumpingJack",
+	"47": "Kayaking",
+	"48": "Knitting",
+	"49": "LongJump",
+	"50": "Lunges",
+	"51": "MilitaryParade",
+	"52": "Mixing",
+	"53": "MoppingFloor",
+	"54": "Nunchucks",
+	"55": "ParallelBars",
+	"56": "PizzaTossing",
+	"57": "PlayingCello",
+	"58": "PlayingDaf",
+	"59": "PlayingDhol",
+	"60": "PlayingFlute",
+	"61": "PlayingGuitar",
+	"62": "PlayingPiano",
+	"63": "PlayingSitar",
+	"64": "PlayingTabla",
+	"65": "PlayingViolin",
+	"66": "PoleVault",
+	"67": "PommelHorse",
+	"68": "PullUps",
+	"69": "Punch",
+	"70": "PushUps",
+	"71": "Rafting",
+	"72": "RockClimbingIndoor",
+	"73": "RopeClimbing",
+	"74": "Rowing",
+	"75": "SalsaSpin",
+	"76": "ShavingBeard",
+	"77": "Shotput",
+	"78": "SkateBoarding",
+	"79": "Skiing",
+	"80": "Skijet",
+	"81": "SkyDiving",
+	"82": "SoccerJuggling",
+	"83": "SoccerPenalty",
+	"84": "StillRings",
+	"85": "SumoWrestling",
+	"86": "Surfing",
+	"87": "Swing",
+	"88": "TableTennisShot",
+	"89": "TaiChi",
+	"90": "TennisSwing",
+	"91": "ThrowDiscus",
+	"92": "TrampolineJumping",
+	"93": "Typing",
+	"94": "UnevenBars",
+	"95": "VolleyballSpiking",
+	"96": "WalkingWithDog",
+	"97": "WallPushups",
+	"98": "WritingOnBoard",
+	"99": "YoYo",
+	"100": "PushUpsClap"
+}

label_map_idx2label_ucf11.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    "0": "basketball_shooting",
+    "1": "biking",
+    "2": "diving",
+    "3": "golf_swing",
+    "4": "horse_riding",
+    "5": "soccer_juggling",
+    "6": "swing",
+    "7": "tennis_swing",
+    "8": "trampoline_jumping",
+    "9": "volleyball_spiking",
+    "10": "walking"
+}

label_map_idx2label_ucf50.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+    "0": "Baseball Pitch",
+    "1": "Basketball Shooting",
+    "2": "Bench Press",
+    "3": "Biking",
+    "4": "Billiards",
+    "5": "BreastStroke",
+    "6": "Clean and Jerk",
+    "7": "Cricket Bowling",
+    "8": "Cricket Shot",
+    "9": "Diving",
+    "10": "Drumming",
+    "11": "Fencing",
+    "12": "Floor Gymnastics",
+    "13": "Golf Swing",
+    "14": "Hammer Throw",
+    "15": "High Jump",
+    "16": "Horse Race",
+    "17": "Horse Riding",
+    "18": "Hula Hoop",
+    "19": "Javelin Throw",
+    "20": "Juggling Balls",
+    "21": "Jump Rope",
+    "22": "Jumping Jack",
+    "23": "Kayaking",
+    "24": "Lunges",
+    "25": "Military Parade",
+    "26": "Mixing Batter",
+    "27": "Nun Chucks",
+    "28": "Parallel Bars",
+    "29": "Pizza Tossing",
+    "30": "Playing Cello",
+    "31": "Playing Daf",
+    "32": "Playing Dhol",
+    "33": "Playing Flute",
+    "34": "Playing Guitar",
+    "35": "Playing Piano",
+    "36": "Playing Tabla",
+    "37": "Playing Violin",
+    "38": "Pole Vault",
+    "39": "Pommel Horse",
+    "40": "Pull Ups",
+    "41": "Punch",
+    "42": "Push Ups",
+    "43": "Rock Climbing Indoor",
+    "44": "Rope Climbing",
+    "45": "Rowing",
+    "46": "Salsa Spin",
+    "47": "Skate Boarding",
+    "48": "Skiing",
+    "49": "Skijet",
+    "50": "Soccer Juggling"
+}

load_dataset.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import os
+import cv2
+import rarfile
+import patoolib
+import tempfile
+import shutil
+import random
+def read_UCF11(data_dir, num_frames):
+    """
+    Reads video data, applies transformations, and extracts features using ResNet50.
+    This function is used for UCF11 dataset.
+    """
+    samples= [] # List to store the features and labels
+    # Loop over the videos in the dataset folder
+    for label in os.listdir(data_dir):
+        label_dir = os.path.join(data_dir, label)
+        print(label_dir)
+        for sub_dir in os.listdir(label_dir):
+            if sub_dir == 'Annotation':
+                continue
+            video_dir = os.path.join(label_dir, sub_dir)
+            for video_file in os.listdir(video_dir):
+                video_path = os.path.join(video_dir, video_file)
+                cap = cv2.VideoCapture(video_path)
+                frame_count = 0
+                frames = []
+                while True:
+                    ret, frame = cap.read()
+                    if ret:
+                        frame_count += 1
+                        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                        frames.append(frame)
+                        if frame_count == num_frames:
+                            break
+                    else:
+                        break
+                cap.release()
+                if len(frames) == num_frames:
+                    samples.append((frames, label))
+    return samples
+def read_UCF50(data_dir, num_frames):
+    """
+    Reads video data, applies transformations, and extracts features using ResNet50.
+    This function is used for UCF50 dataset.
+    """
+    samples= [] # List to store the features and labels
+    # Loop over the videos in the dataset folder
+    for label in os.listdir(data_dir):
+        label_dir = os.path.join(data_dir, label)
+        print(label_dir)
+        for video_file in os.listdir(label_dir):
+            video_path = os.path.join(label_dir, video_file)
+            cap = cv2.VideoCapture(video_path)
+            frame_count = 0
+            frames = []
+            while True:
+                ret, frame = cap.read()
+                if ret:
+                    frame_count += 1
+                    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                    frames.append(frame)
+                    if frame_count == num_frames:
+                        break
+                else:
+                    break
+            cap.release()
+            if len(frames) == num_frames:
+                samples.append((frames, label))
+    return samples
+def read_UCF101(data_dir, num_frames):
+    """
+    Reads video data, applies transformations, and extracts features using ResNet50.
+    This function is used for UCF50 dataset.
+    """
+    samples= [] # List to store the features and labels
+    # Loop over the videos in the dataset folder
+    for label in os.listdir(data_dir):
+        label_dir = os.path.join(data_dir, label)
+        print(label_dir)
+        for video_file in os.listdir(label_dir):
+            video_path = os.path.join(label_dir, video_file)
+            cap = cv2.VideoCapture(video_path)
+            frame_count = 0
+            frames = []
+            while True:
+                ret, frame = cap.read()
+                if ret:
+                    frame_count += 1
+                    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                    frames.append(frame)
+                    if frame_count == num_frames:
+                        break
+                else:
+                    break
+            cap.release()
+            if len(frames) == num_frames:
+                samples.append((frames, label))
+    return samples

lstm.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import torch
+import torch.nn as nn
+class LSTMClassifier(nn.Module):
+    def __init__(self, input_size, hidden_size, num_classes):
+        super(LSTMClassifier, self).__init__()
+        self.hidden_size = hidden_size
+        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
+        self.fc = nn.Linear(hidden_size, num_classes)
+    def forward(self, x):
+        h0 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
+        c0 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
+        # Forward propagate LSTM
+        out, _ = self.lstm(x, (h0, c0))
+        # Decode the hidden state of the last time step
+        out = self.fc(out[:, -1, :])
+        out = nn.functional.softmax(out, dim=1)
+        return out
+class MultiLayerBiLSTMClassifier(nn.Module):
+    def __init__(self, input_size, hidden_size, num_layers, num_classes):
+        super().__init__()
+        self.hidden_size = hidden_size
+        self.num_layers = num_layers
+        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True, dropout=0.2)
+        self.fc = nn.Linear(hidden_size*2, num_classes) # *2 to account for bidirectional LSTM
+        self.dropout = nn.Dropout(0.2)
+    def forward(self, x):
+        # Initialize hidden state and cell state with zeros
+        h0 = torch.zeros(2*self.num_layers, x.size(0), self.hidden_size).to(x.device) # *2 to account for bidirectional LSTM
+        c0 = torch.zeros(2*self.num_layers, x.size(0), self.hidden_size).to(x.device) # *2 to account for bidirectional LSTM
+        # Forward propagate bidirectional LSTM
+        out, _ = self.lstm(x, (h0, c0))
+        out = self.dropout(out[:, -1, :])  # Apply dropout before FC layer
+        # Decode the hidden state of the last time step
+        out = self.fc(out)
+        #out = nn.functional.softmax(out, dim=1)
+        return out

main.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from load_dataset import read_UCF11, read_UCF50, read_UCF101
+from preprocessing import preprocessingData
+from feature_extraction import extract_features, splittingData
+from training import loadFeatures, trainModel
+from testing import testModel
+from evaluation import modelEvaluation
+import numpy as np
+import argparse
+def main(dataset, data_dir):
+    # Define the number of frames to extract features
+    num_frames = 16
+    if dataset.lower() == 'ucf11':
+        # Load the UCF11 dataset
+        samples = read_UCF11(data_dir, num_frames)
+    elif dataset.lower() == 'ucf50':
+        # Load the UCF50 dataset
+        samples = read_UCF50(data_dir, num_frames)
+    elif dataset.lower() == 'ucf101':
+        # Load the UCF101 dataset
+        samples = read_UCF101(data_dir, num_frames)
+    # Preprocess the data
+    processed_data = preprocessingData()
+    # Extract features using ResNet50
+    processed_samples = extract_features(samples, processed_data, dataset.lower())
+    # Split the data into training, validation, and testing sets
+    le = splittingData(processed_samples, dataset.lower())
+    # Load the features and labels for training, validation, and testing sets
+    train_features, train_labels, val_features, val_labels, test_features, test_labels = loadFeatures(dataset.lower())
+    # Train the model
+    model = trainModel(train_features, train_labels, val_features, val_labels, dataset.lower())
+    # Test the model
+    y_pred, y_pred_proba = testModel(model, test_features, test_labels, dataset.lower(), num_frames)
+    # Evaluate the model
+    labels = np.arange(0, len(np.unique(test_labels)), 1)
+    acc_score, pre_score, rec_score, f1, auc = modelEvaluation(y_pred, y_pred_proba, test_labels, labels, dataset.lower())
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Train and evaluate a model on UCF11 or UCF50 dataset.')
+    parser.add_argument('dataset', type=str, help='Dataset to use (UCF11 or UCF50)')
+    parser.add_argument('data_dir', type=str, help='Directory containing the dataset')
+    args = parser.parse_args()
+    main(args.dataset, args.data_dir)

models/ucf11_lstm_model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:849bb832172d8fbdd0bc5fca049c8247c002701358377d8b6db6e04504333575
+size 25224403

preprocessing.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import torchvision.transforms as transforms
+def preprocessingData():
+    transform = transforms.Compose([
+        transforms.ToPILImage(),  # Converts the frame from a NumPy array to a PIL Image, which is required for further transformations.
+        transforms.Resize((224, 224)),  # Resizes the frame to 224x224 pixels, the input size expected by ResNet50.
+        transforms.ToTensor(),  # Converts the PIL Image to a PyTorch tensor and scales pixel values to [0, 1].
+        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalizes the tensor using the mean and standard deviation of the ImageNet dataset, which ResNet50 was trained on.
+    ])
+    return transform

testing.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader, TensorDataset
+import numpy as np
+def testModel(model, test_features, test_labels, dataset, num_frames=32):
+    """
+    "Test the LSTM model on the test set."
+    """
+    model.eval()
+    test_loss = 0.0
+    test_total = 0
+    test_correct = 0
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.to(device)
+    criterion = nn.CrossEntropyLoss()
+    test_dataset = TensorDataset(test_features, test_labels)
+    test_loader = DataLoader(test_dataset, batch_size=num_frames, shuffle=False)
+    predicted_labels = []
+    all_outputs = []
+    with torch.no_grad():
+        for batch_x, batch_y in test_loader:
+            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
+            outputs = model(batch_x)
+            loss = criterion(outputs, batch_y.long())
+            test_loss += loss.item()
+            _, predicted = torch.max(outputs, 1)
+            predicted_labels.extend(predicted.cpu().numpy())
+            # Store softmax probabilities
+            softmax_outputs = torch.nn.functional.softmax(outputs, dim=1)
+            all_outputs.append(softmax_outputs.cpu().numpy())
+            test_correct += (predicted == batch_y).sum().item()
+            test_total += batch_y.size(0)
+    y_pred  = predicted_labels
+    y_pred_proba = np.vstack(all_outputs)  # Convert list of arrays to a single numpy array
+    test_loss /= len(test_loader)
+    test_accuracy = 100 * test_correct / test_total
+    # Print final testing results
+    print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%')
+    # Save the test results to file .txt
+    with open(f'./benchmarks/{dataset}/benchmark.txt', 'a') as f:
+        f.write(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%\n")
+    return y_pred, y_pred_proba

training.py ADDED Viewed

	@@ -0,0 +1,153 @@

+import torch
+import torch.nn as nn
+import torch.optim as optim
+import numpy as np
+from torch.optim.lr_scheduler import StepLR
+from torch.utils.data import DataLoader, TensorDataset
+import matplotlib.pyplot as plt
+import time
+import os
+from lstm import LSTMClassifier, MultiLayerBiLSTMClassifier
+def loadFeatures(dataset):
+    # Load the features and labels from numpy arrays
+    train_features = torch.from_numpy(np.load(f'./features/{dataset}/train_features.npy')).float()
+    train_labels = torch.from_numpy(np.load(f'./features/{dataset}/train_labels.npy'))
+    idx = np.random.permutation(len(train_features))
+    train_features, train_labels = train_features[idx], train_labels[idx]
+    val_features = torch.from_numpy(np.load(f'./features/{dataset}/val_features.npy')).float()
+    val_labels = torch.from_numpy(np.load(f'./features/{dataset}/val_labels.npy'))
+    test_features = torch.from_numpy(np.load(f'./features/{dataset}/test_features.npy')).float()
+    test_labels = torch.from_numpy(np.load(f'./features/{dataset}/test_labels.npy'))
+    return train_features, train_labels, val_features, val_labels, test_features, test_labels
+def trainModel(train_features, train_labels, val_features, val_labels, dataset, num_epochs=100, num_frames=32, hidden_size=256, learning_rate=0.0001):
+    """
+    Train the LSTM model with validation.
+    """
+    input_size = train_features.shape[-1]
+    num_classes = len(np.unique(train_labels))
+    train_losses = []
+    train_accuracies = []
+    val_losses = []
+    val_accuracies = []
+    patience = 10  # Early stopping patience
+    best_val_loss = float("inf")
+    counter = 0
+    # Instantiate the LSTM model
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model = LSTMClassifier(input_size, hidden_size, num_classes).cuda()
+    model = MultiLayerBiLSTMClassifier(input_size, hidden_size, 2, num_classes).to(device)
+    # Define the loss function and optimizer
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
+    scheduler = StepLR(optimizer, step_size=20, gamma=0.5)
+    # Prepare DataLoaders
+    train_dataset = TensorDataset(train_features, train_labels)
+    val_dataset = TensorDataset(val_features, val_labels)
+    train_loader = DataLoader(train_dataset, batch_size=num_frames, shuffle=True)
+    val_loader = DataLoader(val_dataset, batch_size=num_frames, shuffle=False)
+    # Training loop
+    start_time = time.time()
+    for epoch in range(num_epochs):
+        # Training
+        model.train()
+        train_loss, train_total, train_correct = 0.0, 0, 0
+        for batch_features, batch_labels in train_loader:
+            batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
+            optimizer.zero_grad()
+            outputs = model(batch_features)
+            loss = criterion(outputs, batch_labels.long())
+            loss.backward()
+            optimizer.step()
+            train_loss += loss.item()
+            _, predicted = torch.max(outputs, 1)
+            train_correct += (predicted == batch_labels).sum().item()
+            train_total += batch_labels.size(0)
+        train_loss = train_loss / len(train_loader)
+        train_accuracy = 100 * train_correct / train_total
+        train_losses.append(train_loss)
+        train_accuracies.append(train_accuracy)
+        # Validation step
+        model.eval()
+        total_val_loss, correct_val, total_val = 0.0, 0, 0
+        with torch.no_grad():
+            for batch_features, batch_labels in val_loader:
+                batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
+                outputs = model(batch_features)
+                loss = criterion(outputs, batch_labels.long())
+                total_val_loss += loss.item()
+                _, predicted = torch.max(outputs, 1)
+                correct_val += (predicted == batch_labels).sum().item()
+                total_val += batch_labels.size(0)
+        val_loss = total_val_loss / len(val_loader)
+        val_accuracy = 100 * correct_val / total_val
+        val_losses.append(val_loss)
+        val_accuracies.append(val_accuracy)
+        scheduler.step()
+        # Early stopping
+        if val_loss < best_val_loss:
+            best_val_loss = val_loss
+            counter = 0  # Reset counter if validation loss improves
+        else:
+            counter += 1  # Increment counter if validation loss does not improve
+        if counter >= patience:
+            print("Early stopping triggered")
+            break
+        # Print training and validation results
+        print(f'Epoch: [{epoch+1}/100] Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%  Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%')
+    end_time = time.time()
+    print(f'Training completed in {end_time - start_time:.2f} seconds')
+    # Save the training time in file .txt
+    with open(f'./benchmarks/{dataset}/benchmark.txt', 'a') as f:
+        f.write(f"Training time: {end_time - start_time:.2f} seconds\n")
+    os.makedirs(f'./benchmarks/{dataset}', exist_ok=True)
+    # Plot training and testing losses and accuracies
+    plt.figure(figsize=(10, 5))
+    plt.plot(train_losses, label='Training Loss')
+    plt.plot(val_losses, label='Validation Loss')
+    plt.xlabel('Epoch')
+    plt.ylabel('Loss')
+    plt.legend()
+    plt.savefig(f'./benchmarks/{dataset}/loss_plot.png')
+    plt.close()
+    plt.figure(figsize=(10, 5))
+    plt.plot(train_accuracies, label='Training Accuracy')
+    plt.plot(val_accuracies, label='Validation Accuracy')
+    plt.xlabel('Epoch')
+    plt.ylabel('Accuracy')
+    plt.legend()
+    plt.savefig(f'./benchmarks/{dataset}/accuracy_plot.png')
+    plt.close()
+    # 18/4/2025: Added to Save model weights
+    os.makedirs('./models', exist_ok=True)
+    torch.save(model.state_dict(), f'./models/{dataset}_lstm_model.pt')
+    print(f'Model saved to ./models/{dataset}_lstm_model.pt')
+    return model