Spaces:

foreversheikh
/

ano_dect

Running

App Files Files Community

foreversheikh commited on Oct 23, 2025

Commit

1c4c77a

verified ·

1 Parent(s): 17ee76b

Upload 12 files

Browse files

Files changed (12) hide show

network/MFNET.py +278 -0
network/TorchUtils.py +284 -0
network/__init__.py +0 -0
network/__pycache__/MFNET.cpython-311.pyc +0 -0
network/__pycache__/TorchUtils.cpython-311.pyc +0 -0
network/__pycache__/__init__.cpython-311.pyc +0 -0
network/__pycache__/anomaly_detector_model.cpython-311.pyc +0 -0
network/__pycache__/c3d.cpython-311.pyc +0 -0
network/__pycache__/resnet.cpython-311.pyc +0 -0
network/anomaly_detector_model.py +142 -0
network/c3d.py +129 -0
network/resnet.py +232 -0

network/MFNET.py ADDED Viewed

	@@ -0,0 +1,278 @@

+"""Author: Yunpeng Chen."""
+import logging
+from collections import OrderedDict
+import torch
+from torch import nn
+class BN_AC_CONV3D(nn.Module):
+    def __init__(
+        self,
+        num_in,
+        num_filter,
+        kernel=(1, 1, 1),
+        pad=(0, 0, 0),
+        stride=(1, 1, 1),
+        g=1,
+        bias=False,
+    ):
+        super().__init__()
+        self.bn = nn.BatchNorm3d(num_in)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv = nn.Conv3d(
+            num_in,
+            num_filter,
+            kernel_size=kernel,
+            padding=pad,
+            stride=stride,
+            groups=g,
+            bias=bias,
+        )
+    def forward(self, x):
+        h = self.relu(self.bn(x))
+        h = self.conv(h)
+        return h
+class MF_UNIT(nn.Module):
+    def __init__(
+        self,
+        num_in,
+        num_mid,
+        num_out,
+        g=1,
+        stride=(1, 1, 1),
+        first_block=False,
+        use_3d=True,
+    ):
+        super().__init__()
+        num_ix = int(num_mid / 4)
+        kt, pt = (3, 1) if use_3d else (1, 0)
+        # prepare input
+        self.conv_i1 = BN_AC_CONV3D(
+            num_in=num_in, num_filter=num_ix, kernel=(1, 1, 1), pad=(0, 0, 0)
+        )
+        self.conv_i2 = BN_AC_CONV3D(
+            num_in=num_ix, num_filter=num_in, kernel=(1, 1, 1), pad=(0, 0, 0)
+        )
+        # main part
+        self.conv_m1 = BN_AC_CONV3D(
+            num_in=num_in,
+            num_filter=num_mid,
+            kernel=(kt, 3, 3),
+            pad=(pt, 1, 1),
+            stride=stride,
+            g=g,
+        )
+        if first_block:
+            self.conv_m2 = BN_AC_CONV3D(
+                num_in=num_mid, num_filter=num_out, kernel=(1, 1, 1), pad=(0, 0, 0)
+            )
+        else:
+            self.conv_m2 = BN_AC_CONV3D(
+                num_in=num_mid, num_filter=num_out, kernel=(1, 3, 3), pad=(0, 1, 1), g=g
+            )
+        # adapter
+        if first_block:
+            self.conv_w1 = BN_AC_CONV3D(
+                num_in=num_in,
+                num_filter=num_out,
+                kernel=(1, 1, 1),
+                pad=(0, 0, 0),
+                stride=stride,
+            )
+    def forward(self, x):
+        h = self.conv_i1(x)
+        x_in = x + self.conv_i2(h)
+        h = self.conv_m1(x_in)
+        h = self.conv_m2(h)
+        if hasattr(self, "conv_w1"):
+            x = self.conv_w1(x)
+        return h + x
+class MFNET_3D(nn.Module):
+    """Original code: https://github.com/cypw/PyTorch-MFNet."""
+    def __init__(
+        self,
+        **_kwargs,
+    ):
+        super().__init__()
+        groups = 16
+        k_sec = {2: 3, 3: 4, 4: 6, 5: 3}
+        # conv1 - x224 (x16)
+        conv1_num_out = 16
+        self.conv1 = nn.Sequential(
+            OrderedDict(
+                [
+                    (
+                        "conv",
+                        nn.Conv3d(
+                            3,
+                            conv1_num_out,
+                            kernel_size=(3, 5, 5),
+                            padding=(1, 2, 2),
+                            stride=(1, 2, 2),
+                            bias=False,
+                        ),
+                    ),
+                    ("bn", nn.BatchNorm3d(conv1_num_out)),
+                    ("relu", nn.ReLU(inplace=True)),
+                ]
+            )
+        )
+        self.maxpool = nn.MaxPool3d(
+            kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1)
+        )
+        # conv2 - x56 (x8)
+        num_mid = 96
+        conv2_num_out = 96
+        self.conv2 = nn.Sequential(
+            OrderedDict(
+                [
+                    (
+                        "B%02d" % i,
+                        MF_UNIT(
+                            num_in=conv1_num_out if i == 1 else conv2_num_out,
+                            num_mid=num_mid,
+                            num_out=conv2_num_out,
+                            stride=(2, 1, 1) if i == 1 else (1, 1, 1),
+                            g=groups,
+                            first_block=(i == 1),
+                        ),
+                    )
+                    for i in range(1, k_sec[2] + 1)
+                ]
+            )
+        )
+        # conv3 - x28 (x8)
+        num_mid *= 2
+        conv3_num_out = 2 * conv2_num_out
+        self.conv3 = nn.Sequential(
+            OrderedDict(
+                [
+                    (
+                        "B%02d" % i,
+                        MF_UNIT(
+                            num_in=conv2_num_out if i == 1 else conv3_num_out,
+                            num_mid=num_mid,
+                            num_out=conv3_num_out,
+                            stride=(1, 2, 2) if i == 1 else (1, 1, 1),
+                            g=groups,
+                            first_block=(i == 1),
+                        ),
+                    )
+                    for i in range(1, k_sec[3] + 1)
+                ]
+            )
+        )
+        # conv4 - x14 (x8)
+        num_mid *= 2
+        conv4_num_out = 2 * conv3_num_out
+        self.conv4 = nn.Sequential(
+            OrderedDict(
+                [
+                    (
+                        "B%02d" % i,
+                        MF_UNIT(
+                            num_in=conv3_num_out if i == 1 else conv4_num_out,
+                            num_mid=num_mid,
+                            num_out=conv4_num_out,
+                            stride=(1, 2, 2) if i == 1 else (1, 1, 1),
+                            g=groups,
+                            first_block=(i == 1),
+                        ),
+                    )
+                    for i in range(1, k_sec[4] + 1)
+                ]
+            )
+        )
+        # conv5 - x7 (x8)
+        num_mid *= 2
+        conv5_num_out = 2 * conv4_num_out
+        self.conv5 = nn.Sequential(
+            OrderedDict(
+                [
+                    (
+                        "B%02d" % i,
+                        MF_UNIT(
+                            num_in=conv4_num_out if i == 1 else conv5_num_out,
+                            num_mid=num_mid,
+                            num_out=conv5_num_out,
+                            stride=(1, 2, 2) if i == 1 else (1, 1, 1),
+                            g=groups,
+                            first_block=(i == 1),
+                        ),
+                    )
+                    for i in range(1, k_sec[5] + 1)
+                ]
+            )
+        )
+        # final
+        self.tail = nn.Sequential(
+            OrderedDict(
+                [("bn", nn.BatchNorm3d(conv5_num_out)), ("relu", nn.ReLU(inplace=True))]
+            )
+        )
+        self.globalpool = nn.Sequential(
+            OrderedDict(
+                [
+                    ("avg", nn.AvgPool3d(kernel_size=(1, 7, 7), stride=(1, 1, 1))),
+                    ("dropout", nn.Dropout(p=0.5)),  # only for fine-tuning
+                ]
+            )
+        )
+        # self.classifier = nn.Linear(conv5_num_out, num_classes)
+    def forward(self, x):
+        # assert x.shape[2] == 16
+        h = self.conv1(x)  # x224 -> x112
+        h = self.maxpool(h)  # x112 ->  x56
+        h = self.conv2(h)  # x56 ->  x56
+        h = self.conv3(h)  # x56 ->  x28
+        h = self.conv4(h)  # x28 ->  x14
+        h = self.conv5(h)  # x14 ->   x7
+        h = self.tail(h)
+        h = self.globalpool(h)
+        h = h.view(h.shape[0], -1)
+        # h = self.classifier(h)
+        # h = h.view(h.shape[0], -1)
+        return h
+    def load_state(self, state_dict):
+        # customized partialy load function
+        checkpoint = torch.load(state_dict, map_location=torch.device("cpu"))
+        state_dict = checkpoint["state_dict"]
+        net_state_keys = list(self.state_dict().keys())
+        for name, param in state_dict.items():
+            name = name.replace("module.", "")
+            if name in self.state_dict().keys():
+                dst_param_shape = self.state_dict()[name].shape
+                if param.shape == dst_param_shape:
+                    self.state_dict()[name].copy_(param.view(dst_param_shape))
+                    net_state_keys.remove(name)
+        # indicating missed keys
+        if net_state_keys:
+            logging.warning(f">> Failed to load: {net_state_keys}")
+        return self

network/TorchUtils.py ADDED Viewed

	@@ -0,0 +1,284 @@

+"""Written by Eitan Kosman."""
+import logging
+import os
+import time
+from typing import List, Optional, Union
+import torch
+from torch import Tensor, nn
+from torch.optim import Optimizer
+from torch.utils.data import DataLoader
+from utils.callbacks import Callback
+from utils.types import Device
+import torch
+from network.anomaly_detector_model import AnomalyDetector
+# Use safe_globals context
+def get_torch_device() -> Device:
+    """
+    Retrieves the device to run torch models, with preferability to GPU (denoted as cuda by torch)
+    Returns: Device to run the models
+    """
+    return torch.device("cuda" if torch.cuda.is_available() else "cpu")
+def load_model(model_path: str) -> nn.Module:
+    """Loads a Pytorch model (CPU compatible, PyTorch >=2.6)."""
+    logging.info(f"Load the model from: {model_path}")
+    from network.anomaly_detector_model import AnomalyDetector
+    # Wrap torch.load with safe_globals and weights_only=False
+    with torch.serialization.safe_globals([AnomalyDetector]):
+        model = torch.load(model_path, map_location="cpu", weights_only=False)
+    logging.info(model)
+    return model
+class TorchModel(nn.Module):
+    """Wrapper class for a torch model to make it comfortable to train and load
+    models."""
+    def __init__(self, model: nn.Module) -> None:
+        super().__init__()
+        self.device = get_torch_device()
+        self.iteration = 0
+        self.model = model
+        self.is_data_parallel = False
+        self.callbacks = []
+    def register_callback(self, callback_fn: Callback) -> None:
+        """
+        Register a callback to be called after each evaluation run
+        Args:
+            callback_fn: a callable that accepts 2 inputs (output, target)
+                            - output is the model's output
+                            - target is the values of the target variable
+        """
+        self.callbacks.append(callback_fn)
+    def data_parallel(self):
+        """Transfers the model to data parallel mode."""
+        self.is_data_parallel = True
+        if not isinstance(self.model, torch.nn.DataParallel):
+            self.model = torch.nn.DataParallel(self.model, device_ids=[0, 1])
+        return self
+    @classmethod
+    def load_model(cls, model_path: str):
+        """
+        Loads a pickled model
+        Args:
+            model_path: path to the pickled model
+        Returns: TorchModel class instance wrapping the provided model
+        """
+        return cls(load_model(model_path))
+    def notify_callbacks(self, notification, *args, **kwargs) -> None:
+        """Calls all callbacks registered with this class.
+        Args:
+            notification: The type of notification to be called.
+        """
+        for callback in self.callbacks:
+            try:
+                method = getattr(callback, notification)
+                method(*args, **kwargs)
+            except (AttributeError, TypeError) as e:
+                logging.error(
+                    f"callback {callback.__class__.__name__} doesn't fully implement the required interface {e}"  # pylint: disable=line-too-long
+                )
+    def fit(
+        self,
+        train_iter: DataLoader,
+        criterion: nn.Module,
+        optimizer: Optimizer,
+        eval_iter: Optional[DataLoader] = None,
+        epochs: int = 10,
+        network_model_path_base: Optional[str] = None,
+        save_every: Optional[int] = None,
+        evaluate_every: Optional[int] = None,
+    ) -> None:
+        """
+        Args:
+            train_iter: iterator for training
+            criterion: loss function
+            optimizer: optimizer for the algorithm
+            eval_iter: iterator for evaluation
+            epochs: amount of epochs
+            network_model_path_base: where to save the models
+            save_every: saving model checkpoints every specified amount of epochs
+            evaluate_every: perform evaluation every specified amount of epochs.
+                            If the evaluation is expensive, you probably want to
+                            choose a high value for this
+        """
+        criterion = criterion.to(self.device)
+        self.notify_callbacks("on_training_start", epochs)
+        for epoch in range(epochs):
+            train_loss = self.do_epoch(
+                criterion=criterion,
+                optimizer=optimizer,
+                data_iter=train_iter,
+                epoch=epoch,
+            )
+            if save_every and network_model_path_base and epoch % save_every == 0:
+                logging.info(f"Save the model after epoch {epoch}")
+                self.save(os.path.join(network_model_path_base, f"epoch_{epoch}.pt"))
+            val_loss = None
+            if eval_iter and evaluate_every and epoch % evaluate_every == 0:
+                logging.info(f"Evaluating after epoch {epoch}")
+                val_loss = self.evaluate(
+                    criterion=criterion,
+                    data_iter=eval_iter,
+                )
+            self.notify_callbacks("on_training_iteration_end", train_loss, val_loss)
+        self.notify_callbacks("on_training_end", self.model)
+        # Save the last model anyway...
+        if network_model_path_base:
+            self.save(os.path.join(network_model_path_base, f"epoch_{epoch + 1}.pt"))
+    def evaluate(self, criterion: nn.Module, data_iter: DataLoader) -> float:
+        """
+        Evaluates the model
+        Args:
+            criterion: Loss function for calculating the evaluation
+            data_iter: torch data iterator
+        """
+        self.eval()
+        self.notify_callbacks("on_evaluation_start", len(data_iter))
+        total_loss = 0
+        with torch.no_grad():
+            for iteration, (batch, targets) in enumerate(data_iter):
+                batch = self.data_to_device(batch, self.device)
+                targets = self.data_to_device(targets, self.device)
+                outputs = self.model(batch)
+                loss = criterion(outputs, targets)
+                self.notify_callbacks(
+                    "on_evaluation_step",
+                    iteration,
+                    outputs.detach().cpu(),
+                    targets.detach().cpu(),
+                    loss.item(),
+                )
+                total_loss += loss.item()
+        loss = total_loss / len(data_iter)
+        self.notify_callbacks("on_evaluation_end")
+        return loss
+    def do_epoch(
+        self,
+        criterion: nn.Module,
+        optimizer: Optimizer,
+        data_iter: DataLoader,
+        epoch: int,
+    ) -> float:
+        """Perform a whole epoch.
+        Args:
+            criterion (nn.Module): Loss function to be used.
+            optimizer (Optimizer): Optimizer to use for minimizing the loss function.
+            data_iter (DataLoader): Loader for data samples used for training the model.
+            epoch (int): The epoch number.
+        Returns:
+            float: Average training loss calculated during the epoch.
+        """
+        total_loss = 0
+        total_time = 0.0
+        self.train()
+        self.notify_callbacks("on_epoch_start", epoch, len(data_iter))
+        for iteration, (batch, targets) in enumerate(data_iter):
+            self.iteration += 1
+            start_time = time.time()
+            batch = self.data_to_device(batch, self.device)
+            targets = self.data_to_device(targets, self.device)
+            outputs = self.model(batch)
+            loss = criterion(outputs, targets)
+            # Backward and optimize
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+            total_loss += loss.item()
+            end_time = time.time()
+            total_time += end_time - start_time
+            self.notify_callbacks(
+                "on_epoch_step",
+                self.iteration,
+                iteration,
+                loss.item(),
+            )
+            self.iteration += 1
+        loss = total_loss / len(data_iter)
+        self.notify_callbacks("on_epoch_end", loss)
+        return loss
+    def data_to_device(
+        self, data: Union[Tensor, List[Tensor]], device: Device
+    ) -> Union[Tensor, List[Tensor]]:
+        """
+        Transfers a tensor data to a device
+        Args:
+            data: torch tensor
+            device: target device
+        """
+        if isinstance(data, list):
+            data = [d.to(device) for d in data]
+        elif isinstance(data, tuple):
+            data = tuple([d.to(device) for d in data])
+        else:
+            data = data.to(device)
+        return data
+    def save(self, model_path: str) -> None:
+        """Saves the model to the given path.
+        If currently using data parallel, the method
+        will save the original model and not the data parallel instance of it
+        Args:
+            model_path: target path to save the model to
+        """
+        if self.is_data_parallel:
+            torch.save(self.model.module, model_path)
+        else:
+            torch.save(self.model, model_path)
+    def get_model(self) -> nn.Module:
+        if self.is_data_parallel:
+            return self.model.module
+        return self.model
+    def forward(self, *args, **kwargs):
+        return self.model(*args, **kwargs)

network/__init__.py ADDED Viewed

File without changes

network/__pycache__/MFNET.cpython-311.pyc ADDED Viewed

Binary file (10.3 kB). View file

network/__pycache__/TorchUtils.cpython-311.pyc ADDED Viewed

Binary file (14.3 kB). View file

network/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (176 Bytes). View file

network/__pycache__/anomaly_detector_model.cpython-311.pyc ADDED Viewed

Binary file (9.39 kB). View file

network/__pycache__/c3d.cpython-311.pyc ADDED Viewed

Binary file (6.81 kB). View file

network/__pycache__/resnet.cpython-311.pyc ADDED Viewed

Binary file (11.9 kB). View file

network/anomaly_detector_model.py ADDED Viewed

	@@ -0,0 +1,142 @@

+"""This module contains an implementation of anomaly detector for videos."""
+from typing import Callable
+import torch
+from torch import Tensor, nn
+class AnomalyDetector(nn.Module):
+    """Anomaly detection model for videos."""
+    def __init__(self, input_dim=4096) -> None:
+        super().__init__()
+        self.fc1 = nn.Linear(input_dim, 512)
+        self.relu1 = nn.ReLU()
+        self.dropout1 = nn.Dropout(0.6)
+        self.fc2 = nn.Linear(512, 32)
+        self.dropout2 = nn.Dropout(0.6)
+        self.fc3 = nn.Linear(32, 1)
+        self.sig = nn.Sigmoid()
+        # In the original keras code they use "glorot_normal"
+        # As I understand, this is the same as xavier normal in Pytorch
+        nn.init.xavier_normal_(self.fc1.weight)
+        nn.init.xavier_normal_(self.fc2.weight)
+        nn.init.xavier_normal_(self.fc3.weight)
+    @property
+    def input_dim(self) -> int:
+        return self.fc1.weight.shape[1]
+    def forward(self, x: Tensor) -> Tensor:  # pylint: disable=arguments-differ
+        x = self.dropout1(self.relu1(self.fc1(x)))
+        x = self.dropout2(self.fc2(x))
+        x = self.sig(self.fc3(x))
+        return x
+def custom_objective(y_pred: Tensor, y_true: Tensor) -> Tensor:
+    """Calculate loss function with regularization for anomaly detection.
+    Args:
+        y_pred (Tensor): A tensor containing the predictions of the model.
+        y_true (Tensor): A tensor containing the ground truth.
+    Returns:
+        Tensor: A single dimension tensor containing the calculated loss.
+    """
+    # y_pred (batch_size, 32, 1)
+    # y_true (batch_size)
+    lambdas = 8e-5
+    normal_vids_indices = torch.where(y_true == 0)
+    anomal_vids_indices = torch.where(y_true == 1)
+    normal_segments_scores = y_pred[normal_vids_indices].squeeze(-1)  # (batch/2, 32, 1)
+    anomal_segments_scores = y_pred[anomal_vids_indices].squeeze(-1)  # (batch/2, 32, 1)
+    # get the max score for each video
+    normal_segments_scores_maxes = normal_segments_scores.max(dim=-1)[0]
+    anomal_segments_scores_maxes = anomal_segments_scores.max(dim=-1)[0]
+    hinge_loss = 1 - anomal_segments_scores_maxes + normal_segments_scores_maxes
+    hinge_loss = torch.max(hinge_loss, torch.zeros_like(hinge_loss))
+    # Smoothness of anomalous video
+    smoothed_scores = anomal_segments_scores[:, 1:] - anomal_segments_scores[:, :-1]
+    smoothed_scores_sum_squared = smoothed_scores.pow(2).sum(dim=-1)
+    # Sparsity of anomalous video
+    sparsity_loss = anomal_segments_scores.sum(dim=-1)
+    final_loss = (
+        hinge_loss + lambdas * smoothed_scores_sum_squared + lambdas * sparsity_loss
+    ).mean()
+    return final_loss
+class RegularizedLoss(torch.nn.Module):
+    """Regularizes a loss function."""
+    def __init__(
+        self,
+        model: AnomalyDetector,
+        original_objective: Callable,
+        lambdas: float = 0.001,
+    ) -> None:
+        super().__init__()
+        self.lambdas = lambdas
+        self.model = model
+        self.objective = original_objective
+    def forward(self, y_pred: Tensor, y_true: Tensor):  # pylint: disable=arguments-differ
+        # loss
+        # Our loss is defined with respect to l2 regularization, as used in the original keras code
+        fc1_params = torch.cat(tuple([x.view(-1) for x in self.model.fc1.parameters()]))
+        fc2_params = torch.cat(tuple([x.view(-1) for x in self.model.fc2.parameters()]))
+        fc3_params = torch.cat(tuple([x.view(-1) for x in self.model.fc3.parameters()]))
+        l1_regularization = self.lambdas * torch.norm(fc1_params, p=2)
+        l2_regularization = self.lambdas * torch.norm(fc2_params, p=2)
+        l3_regularization = self.lambdas * torch.norm(fc3_params, p=2)
+        return (
+            self.objective(y_pred, y_true)
+            + l1_regularization
+            + l2_regularization
+            + l3_regularization
+        )
+# ----------------------------------------------------------------------------------------------------------------------
+class AnomalyClassifier(nn.Module):
+    """
+    Multi-class anomaly classifier
+    Supports 13 categories: Normal + 12 anomaly classes
+    """
+    def __init__(self, input_dim=512, num_classes=13):
+        super(AnomalyClassifier, self).__init__()
+        self.fc1 = nn.Linear(input_dim, 256)
+        self.relu1 = nn.ReLU()
+        self.dropout1 = nn.Dropout(0.5)
+        self.fc2 = nn.Linear(256, 64)
+        self.relu2 = nn.ReLU()
+        self.dropout2 = nn.Dropout(0.5)
+        self.fc3 = nn.Linear(64, num_classes)  # ✅ 13 outputs
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        x: (B, input_dim) feature vectors
+        returns: (B, num_classes) logits
+        """
+        x = self.dropout1(self.relu1(self.fc1(x)))
+        x = self.dropout2(self.relu2(self.fc2(x)))
+        return self.fc3(x)

network/c3d.py ADDED Viewed

	@@ -0,0 +1,129 @@

+""" "This module contains an implementation of C3D model for video
+processing."""
+import itertools
+import torch
+from torch import Tensor, nn
+class C3D(nn.Module):
+    """The C3D network."""
+    def __init__(self, pretrained=None):
+        super().__init__()
+        self.pretrained = pretrained
+        self.conv1 = nn.Conv3d(3, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1))
+        self.pool1 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))
+        self.conv2 = nn.Conv3d(64, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1))
+        self.pool2 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))
+        self.conv3a = nn.Conv3d(128, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1))
+        self.conv3b = nn.Conv3d(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1))
+        self.pool3 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))
+        self.conv4a = nn.Conv3d(256, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
+        self.conv4b = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
+        self.pool4 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))
+        self.conv5a = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
+        self.conv5b = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
+        self.pool5 = nn.MaxPool3d(
+            kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=(0, 1, 1)
+        )
+        self.fc6 = nn.Linear(8192, 4096)
+        self.relu = nn.ReLU()
+        self.__init_weight()
+        if pretrained:
+            self.__load_pretrained_weights()
+    def forward(self, x: Tensor):
+        x = self.relu(self.conv1(x))
+        x = self.pool1(x)
+        x = self.relu(self.conv2(x))
+        x = self.pool2(x)
+        x = self.relu(self.conv3a(x))
+        x = self.relu(self.conv3b(x))
+        x = self.pool3(x)
+        x = self.relu(self.conv4a(x))
+        x = self.relu(self.conv4b(x))
+        x = self.pool4(x)
+        x = self.relu(self.conv5a(x))
+        x = self.relu(self.conv5b(x))
+        x = self.pool5(x)
+        # x = x.view(-1, 8192)
+        x = x.view(x.size(0), -1) # changed
+        x = self.relu(self.fc6(x))
+        return x
+    def __load_pretrained_weights(self):
+        """Initialiaze network."""
+        corresp_name = [
+            # Conv1
+            "conv1.weight",
+            "conv1.bias",
+            # Conv2
+            "conv2.weight",
+            "conv2.bias",
+            # Conv3a
+            "conv3a.weight",
+            "conv3a.bias",
+            # Conv3b
+            "conv3b.weight",
+            "conv3b.bias",
+            # Conv4a
+            "conv4a.weight",
+            "conv4a.bias",
+            # Conv4b
+            "conv4b.weight",
+            "conv4b.bias",
+            # Conv5a
+            "conv5a.weight",
+            "conv5a.bias",
+            # Conv5b
+            "conv5b.weight",
+            "conv5b.bias",
+            # fc6
+            "fc6.weight",
+            "fc6.bias",
+        ]
+        ignored_weights = [
+            f"{layer}.{type_}"
+            for layer, type_ in itertools.product(["fc7", "fc8"], ["bias", "weight"])
+        ]
+        p_dict = torch.load(self.pretrained)
+        s_dict = self.state_dict()
+        for name in p_dict:
+            if name not in corresp_name:
+                if name in ignored_weights:
+                    continue
+                print("no corresponding::", name)
+                continue
+            s_dict[name] = p_dict[name]
+        self.load_state_dict(s_dict)
+    def __init_weight(self):
+        """Initialize weights of the model."""
+        for m in self.modules():
+            if isinstance(m, nn.Conv3d):
+                torch.nn.init.kaiming_normal_(m.weight)
+            elif isinstance(m, nn.BatchNorm3d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+if __name__ == "__main__":
+    inputs = torch.ones((1, 3, 16, 112, 112))
+    net = C3D(pretrained=False)
+    outputs = net.forward(inputs)
+    print(outputs.size())

network/resnet.py ADDED Viewed

	@@ -0,0 +1,232 @@

+""" "This module contains an implementation of ResNet model for video
+processing."""
+from functools import partial
+import torch
+import torch.nn.functional as F
+from torch import nn
+def get_inplanes():
+    return [64, 128, 256, 512]
+def conv3x3x3(in_planes, out_planes, stride=1):
+    return nn.Conv3d(
+        in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False
+    )
+def conv1x1x1(in_planes, out_planes, stride=1):
+    return nn.Conv3d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, in_planes, planes, stride=1, downsample=None):
+        super().__init__()
+        self.conv1 = conv3x3x3(in_planes, planes, stride)
+        self.bn1 = nn.BatchNorm3d(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3x3(planes, planes)
+        self.bn2 = nn.BatchNorm3d(planes)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(self, in_planes, planes, stride=1, downsample=None):
+        super().__init__()
+        self.conv1 = conv1x1x1(in_planes, planes)
+        self.bn1 = nn.BatchNorm3d(planes)
+        self.conv2 = conv3x3x3(planes, planes, stride)
+        self.bn2 = nn.BatchNorm3d(planes)
+        self.conv3 = conv1x1x1(planes, planes * self.expansion)
+        self.bn3 = nn.BatchNorm3d(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class ResNet(nn.Module):
+    def __init__(
+        self,
+        block,
+        layers,
+        block_inplanes,
+        n_input_channels=3,
+        conv1_t_size=7,
+        conv1_t_stride=1,
+        no_max_pool=False,
+        shortcut_type="B",
+        widen_factor=1.0,
+        n_classes=1039,
+    ):
+        super().__init__()
+        block_inplanes = [int(x * widen_factor) for x in block_inplanes]
+        self.in_planes = block_inplanes[0]
+        self.no_max_pool = no_max_pool
+        self.conv1 = nn.Conv3d(
+            n_input_channels,
+            self.in_planes,
+            kernel_size=(conv1_t_size, 7, 7),
+            stride=(conv1_t_stride, 2, 2),
+            padding=(conv1_t_size // 2, 3, 3),
+            bias=False,
+        )
+        self.bn1 = nn.BatchNorm3d(self.in_planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool3d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(
+            block, block_inplanes[0], layers[0], shortcut_type
+        )
+        self.layer2 = self._make_layer(
+            block, block_inplanes[1], layers[1], shortcut_type, stride=2
+        )
+        self.layer3 = self._make_layer(
+            block, block_inplanes[2], layers[2], shortcut_type, stride=2
+        )
+        self.layer4 = self._make_layer(
+            block, block_inplanes[3], layers[3], shortcut_type, stride=2
+        )
+        self.avgpool = nn.AdaptiveAvgPool3d((1, 1, 1))
+        # self.fc = nn.Linear(block_inplanes[3] * block.expansion, n_classes)
+        for m in self.modules():
+            if isinstance(m, nn.Conv3d):
+                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+            elif isinstance(m, nn.BatchNorm3d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+    def _downsample_basic_block(self, x, planes, stride):
+        out = F.avg_pool3d(x, kernel_size=1, stride=stride)
+        zero_pads = torch.zeros(
+            out.size(0), planes - out.size(1), out.size(2), out.size(3), out.size(4)
+        )
+        if isinstance(out.data, torch.cuda.FloatTensor):
+            zero_pads = zero_pads.cuda()
+        out = torch.cat([out.data, zero_pads], dim=1)
+        return out
+    def _make_layer(self, block, planes, blocks, shortcut_type, stride=1):
+        downsample = None
+        if stride != 1 or self.in_planes != planes * block.expansion:
+            if shortcut_type == "A":
+                downsample = partial(
+                    self._downsample_basic_block,
+                    planes=planes * block.expansion,
+                    stride=stride,
+                )
+            else:
+                downsample = nn.Sequential(
+                    conv1x1x1(self.in_planes, planes * block.expansion, stride),
+                    nn.BatchNorm3d(planes * block.expansion),
+                )
+        layers = []
+        layers.append(
+            block(
+                in_planes=self.in_planes,
+                planes=planes,
+                stride=stride,
+                downsample=downsample,
+            )
+        )
+        self.in_planes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.in_planes, planes))
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        if not self.no_max_pool:
+            x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        # x = self.fc(x)
+        return x
+def generate_model(model_depth, **kwargs):
+    assert model_depth in [10, 18, 34, 50, 101, 152, 200]
+    if model_depth == 10:
+        model = ResNet(BasicBlock, [1, 1, 1, 1], get_inplanes(), **kwargs)
+    elif model_depth == 18:
+        model = ResNet(BasicBlock, [2, 2, 2, 2], get_inplanes(), **kwargs)
+    elif model_depth == 34:
+        model = ResNet(BasicBlock, [3, 4, 6, 3], get_inplanes(), **kwargs)
+    elif model_depth == 50:
+        model = ResNet(Bottleneck, [3, 4, 6, 3], get_inplanes(), **kwargs)
+    elif model_depth == 101:
+        model = ResNet(Bottleneck, [3, 4, 23, 3], get_inplanes(), **kwargs)
+    elif model_depth == 152:
+        model = ResNet(Bottleneck, [3, 8, 36, 3], get_inplanes(), **kwargs)
+    elif model_depth == 200:
+        model = ResNet(Bottleneck, [3, 24, 36, 3], get_inplanes(), **kwargs)
+    return model