Chiquitin commited on Jan 10

Commit

482fd8d

1 Parent(s): 3cca845

upload source code and train configurations

Files changed (36) hide show

requirements.txt +10 -0
src/dataset/__init__.py +13 -0
src/dataset/config.py +29 -0
src/dataset/dataset.py +199 -0
src/dataset/tokenized_dataset.py +217 -0
src/dataset/tokenizer.py +240 -0
src/dlutils/__init__.py +11 -0
src/dlutils/setup/__init__.py +12 -0
src/dlutils/setup/clear.py +33 -0
src/dlutils/setup/device.py +62 -0
src/dlutils/setup/full_setup.py +352 -0
src/dlutils/setup/functions.py +227 -0
src/dlutils/setup/hooks.py +259 -0
src/dlutils/setup/logger.py +91 -0
src/dlutils/setup/marker.py +251 -0
src/dlutils/setup/seeds.py +71 -0
src/dlutils/setup/tensorboard.py +74 -0
src/dlutils/setup/watchers.py +153 -0
src/dlutils/steps.py +246 -0
src/model/__init__.py +13 -0
src/model/config.py +37 -0
src/model/cosenet/__init__.py +12 -0
src/model/cosenet/cosenet.py +189 -0
src/model/cosenet/cosenet_layer.py +55 -0
src/model/cosenet/cosine_distance.py +57 -0
src/model/cosenet/trainable_sigmoid.py +53 -0
src/model/loss.py +115 -0
src/model/segmentation.py +147 -0
src/model/transformers/__init__.py +13 -0
src/model/transformers/attention.py +176 -0
src/model/transformers/pooling.py +78 -0
src/model/transformers/positional_encoding.py +62 -0
train/config.py +193 -0
train/train_logs/config.json +54 -0
train/train_logs/logfile.log +489 -0
train/train_model.py +128 -0

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+numpy==2.3.5
+torch==2.5.1+cu121
+torchaudio==2.5.1+cu121
+torchvision==0.20.1+cu121
+tensorboard==2.20.0
+matplotlib==3.10.7
+datasets==4.4.1
+psutil==7.1.3
+spacy==3.8.11
+tqdm==4.67.1

src/dataset/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+from .tokenizer import SegmentationTokenizer, SentenceSegmenter
+from .dataset import SegmentationDataset
+from .tokenized_dataset import TokenizedSegmentationDataset
+from .config import DatasetConfig
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/dataset/config.py ADDED Viewed

	@@ -0,0 +1,29 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+from dataclasses import dataclass
+@dataclass
+class DatasetConfig:
+    # Paths:
+    train_data_path: str = None
+    val_data_path: str = None
+    test_data_path: str = None
+    # Percentages:
+    train_percentage: float = 1.0
+    val_percentage: float = 1.0
+    test_percentage: float = 1.0
+    # Other parameters:
+    num_workers: int = 0
+    shuffle_train: bool = True
+    shuffle_val: bool = True
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/dataset/dataset.py ADDED Viewed

	@@ -0,0 +1,199 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+import logging
+from torch.utils.data import Dataset, DataLoader
+from datasets import Dataset as HfDataset
+from datasets import load_from_disk
+from .tokenizer import SegmentationTokenizer, SentenceSegmenter
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+class SegmentationDataset(Dataset):
+    def __init__(
+            self,
+            huggingface_dataset: str | HfDataset,
+            tokenizer: SegmentationTokenizer,
+            segmenter: SentenceSegmenter,
+            logger: logging.Logger = None,
+            percentage: float = 1.0,
+            return_type: type = dict
+    ):
+        """
+        A segmentation dataset takes a huggingface dataset or a path to a dataset on disk with the
+        wikipedia-segmentation format. It loads the dataset and prepares it for training.
+        Wikipedia-segmentation format:
+        - The dataset is expected to be a huggingface dataset or a path to a dataset on disk.
+        - The dataset should contain the following fields:
+        >>> sample = {
+        >>>    'text': ['Article 1', 'Article 2', ...],
+        >>>    'titles': ['Title 1', 'Title 2', ...],
+        >>>    'id': str,
+        >>>    'words': int
+        >>>    'paragraphs': int
+        >>>    'sentences': int
+        >>> }
+        - The dataset should be a list of dictionaries, where each dictionary contains the fields above.
+        Parameters
+        ----------
+        huggingface_dataset : str | HfDataset
+            A huggingface dataset or a path to a dataset on disk with the wikipedia-segmentation format.
+        tokenizer : callable
+            A tokenizer function that takes a string and returns a list of tokens.
+        logger : logging.Logger, optional
+            Logger instance. If not provided, a null logger will be used.
+        percentage : float
+            Percentage of the dataset to use. Default is 1.0 (100%).
+        return_type : type
+            The return type of __getitem__, either dict or tuple. Default is dict.
+        Raises
+        ------
+        ValueError
+            If the huggingface_dataset is not a string or a HfDataset.
+        ValueError
+            If the tokenizer is not a callable function or class.
+        ValueError
+            If the sentence_tokenizer is not a callable function or class.
+        ValueError
+            If the dtype is not a type.
+        """
+        # Null logging:
+        if not isinstance(logger, logging.Logger):
+            self.logger = logging.getLogger("null")
+            self.logger.addHandler(logging.NullHandler())
+        else:
+            self.logger = logger
+        # Loading:
+        if isinstance(huggingface_dataset, HfDataset):
+            self.huggingface_dataset = huggingface_dataset
+        elif isinstance(huggingface_dataset, str):
+            self.huggingface_dataset = load_from_disk(huggingface_dataset)
+        else:
+            self.logger.error(f'[SegmentationDataset] huggingface_dataset must be either a string or a HfDataset.')
+            raise ValueError(f'[SegmentationDataset] huggingface_dataset must be either a string or a HfDataset.')
+        self.logger.info(f'[SegmentationDataset] Loaded dataset: {self.huggingface_dataset}')
+        self.logger.info(f'[SegmentationDataset] Loaded dataset length: {self.huggingface_dataset.num_rows}')
+        # Tokenizer:
+        if callable(tokenizer):
+            self.tokenizer = tokenizer
+        else:
+            self.logger.error(f'[SegmentationDataset] Tokenizer must be a callable function.')
+            raise ValueError(f'[SegmentationDataset] Tokenizer must be a callable function.')
+        # Segmenter:
+        if not isinstance(segmenter, SentenceSegmenter):
+            self.logger.error(f'[SegmentationDataset] Segmenter must be a SentenceSegmenter instance.')
+            raise ValueError(f'[SegmentationDataset] Segmenter must be a SentenceSegmenter instance.')
+        else:
+            self.segmenter = segmenter
+        # Percentage:
+        if not (0.0 < percentage <= 1.0):
+            self.logger.error(f'[SegmentationDataset] Percentage must be between 0.0 and 1.0.')
+            raise ValueError(f'[SegmentationDataset] Percentage must be between 0.0 and 1.0.')
+        else:
+            self.percentage = percentage
+        # Return type:
+        if not isinstance(return_type, type):
+            self.logger.error(f'[SegmentationDataset] return_type must be a type.')
+            raise ValueError(f'[SegmentationDataset] return_type must be a type.')
+        elif return_type not in [dict, tuple]:
+            self.logger.error(f'[SegmentationDataset] return_type must be either dict or tuple.')
+            raise ValueError(f'[SegmentationDataset] return_type must be either dict or tuple.')
+        else:
+            self.return_type = return_type
+    def get_loader(self, batch_size=8, shuffle=True, num_workers=0, **kwargs) -> DataLoader:
+        """
+        Returns a PyTorch DataLoader for this dataset.
+        Parameters
+        ----------
+        batch_size : int
+            Number of samples per batch.
+        shuffle : bool
+            Whether to shuffle the dataset.
+        num_workers : int
+            Number of worker processes.
+        **kwargs
+            Additional arguments for DataLoader.
+        Returns
+        -------
+        [torch.utils.data.DataLoader
+            Configured DataLoader.
+        """
+        # Size handling:
+        return DataLoader(self, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers,
+                          pin_memory=True, **kwargs)
+    def __len__(self) -> int:
+        """
+        Returns the number of samples in the dataset.
+        Returns
+        -------
+        int
+            Total number of samples.
+        """
+        return int(self.huggingface_dataset.num_rows * self.percentage)
+    def __getitem__(self, idx) -> dict | tuple:
+        """
+        Retrieves a single sample and generates segmentation labels.
+        Parameters
+        ----------
+        idx : int
+            Index of the sample.
+        Returns
+        -------
+        tuple
+            A tuple or dict (x_i, y_i, mask_x) with noisy input and corresponding target.
+        """
+        sample = self.huggingface_dataset[idx]['text']
+        sentences = self.segmenter(sample)
+        tokenized = self.tokenizer(sentences['sentences'])
+        if self.return_type == tuple:
+            return (
+                tokenized['input_ids'],                 # x
+                sentences['sentence_boundaries'],       # y
+                tokenized['attention_mask'],            # x_mask
+                sentences['sentence_mask'],             # y_mask
+                sentences['sentence_candidates'],       # y_prime_mask
+            )
+        elif self.return_type == dict:
+            return_value = {
+                'input': tokenized['input_ids'],
+                'input_mask': tokenized['attention_mask'],
+                'labels': sentences['sentence_boundaries'],
+                'output_mask': sentences['sentence_mask'],
+                'candidate_mask': sentences['sentence_candidates']
+            }
+        else:
+            raise ValueError(f'[SegmentationDataset] return_type must be either dict or tuple.')
+        return return_value
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/dataset/tokenized_dataset.py ADDED Viewed

	@@ -0,0 +1,217 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+import logging
+import json
+import os
+import numpy as np
+from torch.utils.data import Dataset, DataLoader
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+class TokenizedSegmentationDataset(Dataset):
+    def __init__(
+            self,
+            tokenized_dataset: str,
+            logger: logging.Logger = None,
+            percentage: float = 1.0,
+            return_type: type = dict
+    ):
+        """
+        A tokoenized segmentation dataset takes a huggingface dataset or a path to a dataset on disk with the
+        wikipedia-segmentation format. It loads the dataset and prepares it for training.
+        Wikipedia-segmentation format:
+        - The dataset is expected to be a huggingface dataset or a path to a dataset on disk.
+        - The dataset should contain the following fields:
+        >>> sample = {
+        >>>    'text': ['Article 1', 'Article 2', ...],
+        >>>    'titles': ['Title 1', 'Title 2', ...],
+        >>>    'id': str,
+        >>>    'words': int
+        >>>    'paragraphs': int
+        >>>    'sentences': int
+        >>> }
+        - The dataset should be a list of dictionaries, where each dictionary contains the fields above.
+        Parameters
+        ----------
+        tokenized_dataset : str
+            A path to a tokenized dataset on disk with the wikipedia-segmentation format.
+        logger : logging.Logger, optional
+            Logger instance. If not provided, a null logger will be used.
+        percentage : float
+            Percentage of the dataset to use. Default is 1.0 (100%).
+        return_type : type
+            The return type of __getitem__, either dict or tuple. Default is dict.
+        Raises
+        ------
+        ValueError
+            If the huggingface_dataset is not a string or a HfDataset.
+        ValueError
+            If the tokenizer is not a callable function or class.
+        ValueError
+            If the sentence_tokenizer is not a callable function or class.
+        ValueError
+            If the dtype is not a type.
+        """
+        # Null logging:
+        if not isinstance(logger, logging.Logger):
+            self.logger = logging.getLogger("null")
+            self.logger.addHandler(logging.NullHandler())
+        else:
+            self.logger = logger
+        # Loading:
+        if isinstance(tokenized_dataset, str):
+            self.metadata_path = os.path.join(tokenized_dataset, 'info.json')
+            if not os.path.exists(self.metadata_path):
+                self.logger.error(f'[SegmentationDataset] Dataset metadata file not found at {self.metadata_path}.')
+                raise FileNotFoundError(f'[SegmentationDataset] Dataset metadata file not found at {self.metadata_path}.')
+            else:
+                with open(self.metadata_path, 'r', encoding='utf-8') as f:
+                    self.metadata = json.load(f)
+                if 'fingerprint' not in self.metadata or not self.metadata['fingerprint']:
+                    raise ValueError(f'[SegmentationDataset] Dataset metadata file is missing fingerprint information.')
+        else:
+            self.logger.error(f'[SegmentationDataset] huggingface_dataset must be either a string or a HfDataset.')
+            raise ValueError(f'[SegmentationDataset] huggingface_dataset must be either a string or a HfDataset.')
+        self.logger.info(f'[SegmentationDataset] Loaded dataset: {tokenized_dataset}')
+        self.logger.info(f'[SegmentationDataset] Loaded dataset length: {self.metadata["samples"]}')
+        # Percentage:
+        if not (0.0 < percentage <= 1.0):
+            self.logger.error(f'[SegmentationDataset] Percentage must be between 0.0 and 1.0.')
+            raise ValueError(f'[SegmentationDataset] Percentage must be between 0.0 and 1.0.')
+        else:
+            self.percentage = percentage
+        # Return type:
+        if not isinstance(return_type, type):
+            self.logger.error(f'[SegmentationDataset] return_type must be a type.')
+            raise ValueError(f'[SegmentationDataset] return_type must be a type.')
+        elif return_type not in [dict, tuple]:
+            self.logger.error(f'[SegmentationDataset] return_type must be either dict or tuple.')
+            raise ValueError(f'[SegmentationDataset] return_type must be either dict or tuple.')
+        else:
+            self.return_type = return_type
+        self.metadata['max_sentences'] = self.metadata['x']['element_shape'][0]
+        self.metadata['max_tokens'] = self.metadata['x']['element_shape'][1]
+        # Build maps:
+        read_mode = 'r'
+        self.x_map = np.memmap(
+            os.path.join(tokenized_dataset, self.metadata['x']['name'] + self.metadata['x']['extension']),
+            dtype=self.metadata['x']['dtype'],
+            mode=read_mode,
+            shape=(self.metadata['x']['samples'], *self.metadata['x']['element_shape'])
+        )
+        self.y_map = np.memmap(
+            os.path.join(tokenized_dataset, self.metadata['y']['name'] + self.metadata['y']['extension']),
+            dtype=self.metadata['y']['dtype'],
+            mode=read_mode,
+            shape=(self.metadata['y']['samples'], *self.metadata['y']['element_shape'])
+        )
+        self.x_mask_map = np.memmap(
+            os.path.join(tokenized_dataset, self.metadata['x_mask']['name'] + self.metadata['x_mask']['extension']),
+            dtype=self.metadata['x_mask']['dtype'],
+            mode=read_mode,
+            shape=(self.metadata['x_mask']['samples'], *self.metadata['x_mask']['element_shape'])
+        )
+        self.y_mask_map = np.memmap(
+            os.path.join(tokenized_dataset, self.metadata['y_mask']['name'] + self.metadata['y_mask']['extension']),
+            dtype=self.metadata['y_mask']['dtype'],
+            mode=read_mode,
+            shape=(self.metadata['y_mask']['samples'], *self.metadata['y_mask']['element_shape'])
+        )
+        self.y_cand_map = np.memmap(
+            os.path.join(tokenized_dataset, self.metadata['y_cand']['name'] + self.metadata['y_cand']['extension']),
+            dtype=self.metadata['y_cand']['dtype'],
+            mode=read_mode,
+            shape=(self.metadata['y_cand']['samples'], *self.metadata['y_cand']['element_shape'])
+        )
+    def get_loader(self, batch_size=8, shuffle=True, num_workers=0, **kwargs) -> DataLoader:
+        """
+        Returns a PyTorch DataLoader for this dataset.
+        Parameters
+        ----------
+        batch_size : int
+            Number of samples per batch.
+        shuffle : bool
+            Whether to shuffle the dataset.
+        num_workers : int
+            Number of worker processes.
+        **kwargs
+            Additional arguments for DataLoader.
+        Returns
+        -------
+        [torch.utils.data.DataLoader
+            Configured DataLoader.
+        """
+        # Size handling:
+        return DataLoader(self, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, pin_memory=True,
+                          **kwargs)
+    def __len__(self) -> int:
+        """
+        Returns the number of samples in the dataset.
+        Returns
+        -------
+        int
+            Total number of samples.
+        """
+        return int(self.metadata['samples'] * self.percentage)
+    def __getitem__(self, idx) -> dict | tuple:
+        """
+        Retrieves a single sample and generates segmentation labels.
+        Parameters
+        ----------
+        idx : int
+            Index of the sample.
+        Returns
+        -------
+        tuple
+            A tuple or dict (x_i, y_i, mask_x) with noisy input and corresponding target.
+        """
+        if self.return_type == tuple:
+            return (
+                np.array(self.x_map[idx]),  # ← copia
+                np.array(self.y_map[idx]),
+                np.array(self.x_mask_map[idx]),
+                np.array(self.y_mask_map[idx]),
+                np.array(self.y_cand_map[idx]),
+            )
+        elif self.return_type == dict:
+            return {
+                'input': np.array(self.x_map[idx]),
+                'input_mask': np.array(self.x_mask_map[idx]),
+                'labels': np.array(self.y_map[idx]),
+                'output_mask': np.array(self.y_mask_map[idx]),
+                'candidate_mask': np.array(self.y_cand_map[idx]),
+            }
+        else:
+            raise ValueError(f'[SegmentationDataset] return_type must be either dict or tuple.')
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/dataset/tokenizer.py ADDED Viewed

	@@ -0,0 +1,240 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+import tokenizers
+import sys
+import subprocess
+import logging
+import spacy
+import numpy as np
+from tokenizers.models import BPE
+from tokenizers.trainers import BpeTrainer
+from tokenizers.pre_tokenizers import Whitespace
+from tokenizers.normalizers import NFKC
+from transformers import PreTrainedTokenizerFast
+# - # - # - # - # - # - # - # - # - # - # - # - # - # - # - #
+class SegmentationTokenizer:
+    def __init__(
+        self,
+        vocab_size=32_768,
+        min_frequency=2,
+        max_length=1024
+    ):
+        self.max_length = max_length
+        # Raw tokenizer (training)
+        self.raw_tokenizer = tokenizers.Tokenizer(
+            BPE(unk_token="[UNK]")
+        )
+        self.raw_tokenizer.normalizer = NFKC()
+        self.raw_tokenizer.pre_tokenizer = Whitespace()
+        self.trainer = BpeTrainer(
+            vocab_size=vocab_size,
+            min_frequency=min_frequency,
+            special_tokens=["[PAD]", "[UNK]", "[CLS]", "[SEP]", "[MASK]"]
+        )
+        self._hf_tokenizer = None  # created after training
+    # ---------- TRAINING ----------
+    def build_iterator(self, dataset, batch_size=1024):
+        batch = []
+        for item in dataset:
+            batch.append("\n".join(item["text"]).replace("\n\n", "\n"))
+            if len(batch) == batch_size:
+                yield batch
+                batch = []
+        if batch:
+            yield batch
+    def train_from_iterator(self, iterator):
+        self.raw_tokenizer.train_from_iterator(
+            iterator, trainer=self.trainer
+        )
+    # ---------- IO ----------
+    def save(self, path):
+        self.raw_tokenizer.save(path)
+    def load(self, tokenizer_path):
+        self._hf_tokenizer = PreTrainedTokenizerFast(
+            tokenizer_file=tokenizer_path,
+            unk_token="[UNK]",
+            pad_token="[PAD]",
+            cls_token="[CLS]",
+            sep_token="[SEP]",
+            mask_token="[MASK]"
+        )
+        return self
+    # ---------- TOKENIZATION  ----------
+    def compute_unk_rate(self, corpus):
+        unk_id = self._hf_tokenizer.convert_tokens_to_ids("[UNK]")
+        total_tokens = 0
+        unk_tokens = 0
+        for text in corpus:
+            enc = self._hf_tokenizer(
+                text,
+                add_special_tokens=False
+            )["input_ids"]
+            total_tokens += len(enc)
+            unk_tokens += sum(1 for t in enc if t == unk_id)
+        return unk_tokens / total_tokens if total_tokens > 0 else 0.0
+    def __call__(
+        self,
+        text,
+        return_tensors="pt",
+        padding=True,
+        truncation=True
+    ):
+        """
+        text: str or List[str]
+        returns: dict with input_ids and attention_mask (torch.long)
+        """
+        if self._hf_tokenizer is None:
+            raise RuntimeError("Tokenizer not loaded. Call .load() first.")
+        enc = self._hf_tokenizer(
+            text,
+            padding="max_length" if padding else False,
+            truncation=truncation,
+            max_length=self.max_length,
+            return_tensors=return_tensors
+        )
+        return {
+            "input_ids": enc["input_ids"],           # torch.LongTensor
+            "attention_mask": enc["attention_mask"]  # torch.LongTensor
+        }
+    @property
+    def vocab_size(self):
+        if self._hf_tokenizer is None:
+            raise RuntimeError("Tokenizer not loaded.")
+        return self._hf_tokenizer.vocab_size
+    def __repr__(self):
+        return f"<SegmentationTokenizer vocab_size={self.trainer.vocab_size}>"
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        SENTENCE SEG                       #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+class SentenceSegmenter:
+    def __init__(
+        self,
+        max_sentences: int,
+        spacy_model: str = "es_core_news_sm",
+        logger: logging.Logger | None = None
+    ):
+        self.max_sentences = max_sentences
+        self.logger = self._get_logger(logger)
+        self.nlp = self.__build_model__(spacy_model, logger=self.logger)
+    @staticmethod
+    def __build_model__(sentence_tokenizer_model: str, logger: logging.Logger) -> spacy.language.Language:
+        """
+        Download the pre-trained sentence tokenizer model.
+        :param sentence_tokenizer_model: The sentence tokenizer model to download.
+        :return: The spacy language model.
+        """
+        try:
+            spacy_model = spacy.load(sentence_tokenizer_model)
+        except OSError:
+            result = subprocess.run(
+                [sys.executable, "-m", "spacy", "download", sentence_tokenizer_model],
+                capture_output=True,
+                text=True
+            )
+            if result.returncode != 0:
+                logger.error(f'[BEAST-Tokenizer]: Loading {sentence_tokenizer_model} failed.')
+                raise RuntimeError(f"[BEAST-Tokenizer]: Error while downloading '{sentence_tokenizer_model}'")
+            spacy_model = spacy.load(sentence_tokenizer_model)
+        logger.info('[BEAST-Tokenizer]: Successfully downloaded the pre-trained sentence tokenizer model.')
+        if 'parser' not in spacy_model.pipe_names:
+            logger.error(f'[BEAST-Tokenizer]: The SpaCy model needs a parser installed.')
+            raise RuntimeError(f'[BEAST-Tokenizer]: The SpaCy model needs a parser installed.')
+        else:
+            spacy_model.add_pipe("newline_segmenter_keep_exact", before="parser")
+        return spacy_model
+    @staticmethod
+    def _get_logger(logger):
+        if logger is None:
+            logger = logging.getLogger(__name__)
+            logger.addHandler(logging.NullHandler())
+        return logger
+    def __call__(self, texts: list[str]) -> dict:
+        sentences = list()
+        sentence_candidates = list()
+        sentence_boundaries = list()
+        sentence_masking = list()
+        for article in texts:
+            doc = self.nlp(article)
+            for idx, sent in enumerate(doc.sents):
+                if idx == 0:
+                    # Article opener
+                    sentence_candidates.append(1)
+                    sentence_boundaries.append(1)
+                elif sent.text.endswith("\n"):
+                    # Paragraph break candidate
+                    sentence_candidates.append(1)
+                    sentence_boundaries.append(0)
+                else:
+                    sentence_candidates.append(0)
+                    sentence_boundaries.append(0)
+                sentences.append(sent.text.replace('\n', '').strip())
+                sentence_masking.append(1)
+                if len(sentences) >= self.max_sentences:
+                    self.logger.warning(f"Maximum number of sentences reached: {self.max_sentences}")
+                    break
+            if len(sentences) >= self.max_sentences:
+                break
+        # Pad with zeros:
+        while len(sentences) < self.max_sentences:
+            sentences.append("")
+            sentence_candidates.append(0)
+            sentence_boundaries.append(0)
+            sentence_masking.append(0)
+        return {
+            "sentences": sentences,
+            "sentence_candidates": np.array(sentence_candidates, dtype=np.int8),
+            "sentence_boundaries": np.array(sentence_boundaries, dtype=np.int8),
+            "sentence_mask": np.array(sentence_masking, dtype=np.int8)
+        }
+@spacy.Language.component("newline_segmenter_keep_exact")
+def newline_segmenter_keep_exact(doc):
+    for token in doc[:-1]:
+        if token.text == "\n":
+            doc[token.i + 1].is_sent_start = True
+    return doc
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/dlutils/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+from .setup import Setup
+from .steps import train_step, validation_step
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/dlutils/setup/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+from .full_setup import Setup
+from .hooks import HookMonitor
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/dlutils/setup/clear.py ADDED Viewed

	@@ -0,0 +1,33 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+import os
+import shutil
+import logging
+# - # - # - # - # - # - # - # - # - # - # - # - # - # - # - #
+def clear_logs(log_path: str):
+    """
+    Clears all the files inside log_path path.
+    Args:
+        log_path (str): The file path to be clean.
+    Raises:
+        ValueError: If the log_path is not valid.
+    """
+    # Close all loggers:
+    logging.getLogger().handlers.clear()
+    if os.path.exists(log_path):
+        # Clear the directory if it exists
+        shutil.rmtree(log_path)
+    else:
+        raise ValueError(f'Path {log_path} does not exist.')
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/dlutils/setup/device.py ADDED Viewed

	@@ -0,0 +1,62 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+import torch
+import logging
+def get_device(number: int, logger: logging.Logger = None):
+    """
+    Configures PyTorch to use a specified GPU by its index number,
+    or falls back to CPU if CUDA is not available.
+    Args:
+        number (int): The index number of the GPU to use.
+        logger (logging.Logger, optional): Logger for logging GPU info.
+    Returns:
+        torch.device: The selected torch device (GPU or CPU).
+    """
+    # Fallback to CPU if CUDA is not available
+    if not torch.cuda.is_available():
+        if logger:
+            logger.warning("CUDA is not available. Falling back to CPU.")
+        return torch.device('cpu')
+    # Check if the specified GPU number is valid
+    if number >= torch.cuda.device_count() or number < 0:
+        raise ValueError(
+            f"GPU number {number} is not valid. Available GPU indices range from 0 to {torch.cuda.device_count() - 1}.")
+    # Clean up memory and stats
+    torch.cuda.empty_cache()
+    torch.cuda.reset_peak_memory_stats()
+    torch.cuda.reset_accumulated_memory_stats()
+    # Set and log device
+    torch.cuda.set_device(number)
+    if logger:
+        logger.info(f"PyTorch is now configured to use GPU {number}: {torch.cuda.get_device_name(number)}")
+        device_name = torch.cuda.get_device_name(number)
+        total_mem = torch.cuda.get_device_properties(number).total_memory / 1024 ** 2
+        mem_allocated = torch.cuda.memory_allocated(number) / 1024 ** 2
+        mem_reserved = torch.cuda.memory_reserved(number) / 1024 ** 2
+        max_allocated = torch.cuda.max_memory_allocated(number) / 1024 ** 2
+        max_reserved = torch.cuda.max_memory_reserved(number) / 1024 ** 2
+        logger.info(f"[GPU {number} - {device_name}] Memory Stats:")
+        logger.info(f"  Total Memory      : {total_mem:.2f} MB")
+        logger.info(f"  Currently Allocated : {mem_allocated:.2f} MB")
+        logger.info(f"  Currently Reserved  : {mem_reserved:.2f} MB")
+        logger.info(f"  Max Allocated       : {max_allocated:.2f} MB")
+        logger.info(f"  Max Reserved        : {max_reserved:.2f} MB")
+    return torch.device(f'cuda:{number}')
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/dlutils/setup/full_setup.py ADDED Viewed

	@@ -0,0 +1,352 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+import logging
+import torch
+import os
+import glob
+import json
+import matplotlib.pyplot as plt
+from .logger import get_logger
+from .tensorboard import get_writer
+from .seeds import get_seed
+from .device import get_device
+from .clear import clear_logs
+from .marker import register_replay, register
+from .watchers import DEFAULT_WATCHER, S_WATCHER, A_WATCHER, B_WATCHER, C_WATCHER, CNN_WATCHER, AEN_WATCHER, TRA_WATCHER
+from dataclasses import asdict
+# - # - # - # - # - # - # - # - # - # - # - # - # - # - # - #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+class Setup:
+    def __init__(
+            self,
+            path: str,
+            device: int = 0,
+            seed: int = None,
+            save_each: int = 1,
+            reload_state: bool = False,
+            tensorboard: int | bool = 6006,
+            autoscaler: bool = True,
+            replay_element: tuple = (-1, None)
+    ):
+        """
+        This class is used to set up the environment for an AI experiment. It saves
+        the model checkpoints, logs, and tensorboard files. It also sets the device
+        and seed for reproducibility.
+        Usage:
+            >>> from *** import Setup
+            >>> setup = Setup(path='logs', device=0, seed=42, save_each=10)
+        Inside the train loop:
+            >>> model: torch.Model
+            >>> loss_value: torch.Tensor
+            >>> y: torch.Tensor
+            >>> y_hat: torch.Tensor
+            >>> setup.check(model)
+            >>> setup.register('loss', loss_value)
+            >>> setup.register_replay(y, y_hat)
+        In case you want to reload latest checkpoint:
+            >>> setup.reload(model)
+        :param path: The path to the logs.
+        :param device: The device to use.
+        :param seed: The seed to use.
+        :param save_each: The number of epochs to save the model.
+        :param reload_state: Whether to reload the latest checkpoint.
+        :param tensorboard: Whether to use tensorboard.
+        :param autoscaler: Whether to use autoscaler for training.
+        :param replay_element: The element to replay.
+        """
+        # Clear logs:
+        self.path = path
+        self.save_each = save_each
+        self.tensorboard_required = tensorboard
+        self.replay_id = replay_element
+        self.__epoch_count = 0
+        if not reload_state:
+            self.clear(path)
+        self.logger = self.set_logger(path)
+        self.writer, self.ch_path = self.set_writer(path, tensorboard) if tensorboard else (None, os.path.join(path, 'checkpoints'))
+        self.seed = self.set_seed(seed)
+        self.device = self.set_device(device)
+        self.log_setup_info()
+        self.watcher = DEFAULT_WATCHER
+        self.autoscaler = torch.amp.GradScaler(enabled=self.device.type == 'cuda') if autoscaler else None
+    def log_setup_info(self):
+        """
+        Log the setup information.
+        """
+        self.logger.info("Setup information:")
+        self.logger.info(f"- Setup path: {self.path}")
+        self.logger.info(f"- Setup checkpoints path: {self.ch_path}")
+        self.logger.info(f"- Setup device: {self.device}")
+        self.logger.info(f"- Setup seed: {self.seed}")
+        self.logger.info(f"- Setup logger: {self.logger}")
+        self.logger.info(f"- Setup writer: {self.writer}")
+        self.logger.info(f"- Setup save each: {self.save_each}")
+    def check(
+            self,
+            model: torch.nn.Module,
+            optimizer: torch.optim.Optimizer | None = None,
+            learning_rate: torch.optim.lr_scheduler.LRScheduler | None = None
+    ) -> bool:
+        """
+        Check the model and save it if the epoch count is a multiple of save_each.
+        :param model: The model to checkpoint and save.
+        :param optimizer: The optimizer to save.
+        :param learning_rate: The learning rate scheduler to save.
+        :return: If the model is checkpointed.
+        """
+        self.__epoch_count += 1
+        if self.save_each is not None and self.__epoch_count % self.save_each == 0:
+            self.logger.info(f"Checkpointing model at epoch {self.__epoch_count}")
+            self.save_model(
+                model=model,
+                optimizer=optimizer,
+                learning_rate=learning_rate
+            )
+            self.logger.info(f"Model checkpointed at epoch {self.__epoch_count}")
+            return True
+        return False
+    def save_model(
+            self,
+            model: torch.nn.Module,
+            optimizer: torch.optim.Optimizer | None = None,
+            learning_rate: torch.optim.lr_scheduler.LRScheduler | None = None
+    ):
+        """
+        Saves the model.
+        :param model: The model to save.
+        :param optimizer: The optimizer to save.
+        :param learning_rate: The learning rate scheduler to save.
+        :return: Nothing.
+        """
+        torch_state = {
+            'epoch': self.__epoch_count,
+            'model_state_dict': model.state_dict(),
+            'optimizer_state_dict': optimizer.state_dict() if optimizer else None,
+            'scheduler_state_dict': learning_rate.state_dict() if learning_rate else None,
+            'seed': self.seed
+        }
+        torch.save(torch_state, self.ch_path + f'/model_epoch_{self.__epoch_count}.pt')
+    def reload(
+            self,
+            model: torch.nn.Module,
+            optimizer: torch.optim.Optimizer | None = None,
+            learning_rate: torch.optim.lr_scheduler.LRScheduler | None = None
+    ) -> None:
+        """
+        Reloads the latest checkpoint into the given model.
+        :param model: The PyTorch model to reload the state into.
+        :param optimizer: The optimizer to reload the state into.
+        :param learning_rate: The learning rate scheduler to reload the state into.
+        """
+        # Find all matching checkpoints
+        checkpoints = glob.glob(os.path.join(self.ch_path, 'model_epoch_*.pt'))
+        if not checkpoints:
+            self.logger.warning("No checkpoint files found.")
+        else:
+            # Sort by modification time and get the latest
+            checkpoints.sort(key=os.path.getmtime)
+            latest_checkpoint = checkpoints[-1]
+            try:
+                state_dict = torch.load(latest_checkpoint, map_location=self.device)
+                # Load model and info:
+                model.load_state_dict(state_dict['model_state_dict'])
+                model.to(self.device)
+                self.__epoch_count = state_dict['epoch']
+                self.seed = state_dict['seed']
+                self.logger.info(f"Model reloaded from {latest_checkpoint} at epoch {self.__epoch_count} and "
+                                 f"seed {self.seed}")
+                # Load optimizer and learning rate scheduler if provided
+                if optimizer and state_dict['optimizer_state_dict'] is not None:
+                    optimizer.load_state_dict(state_dict['optimizer_state_dict'])
+                    self.logger.info(f"Optimizer state_dict loaded from {latest_checkpoint}")
+                if learning_rate and state_dict['scheduler_state_dict'] is not None:
+                    learning_rate.load_state_dict(state_dict['scheduler_state_dict'])
+                    self.logger.info(f"Scheduler state_dict loaded from {latest_checkpoint}")
+            except Exception as e:
+                self.logger.error(f"Failed to reload model from {latest_checkpoint}: {e}")
+                raise RuntimeError(f"Failed to reload model from {latest_checkpoint}: {e}")
+    def set_watcher(self, flag_names: str | list[tuple], deactivate: bool = False) -> None:
+        """
+        Sets up the parameter watcher to the tensorboard.
+        :param flag_names: The names of the flags to watch as a tuple of strings.
+        :param deactivate: Whether to deactivate the watcher.
+        :return: Nothing
+        """
+        if isinstance(flag_names, str):
+            if flag_names == 'S':
+                flag_names = S_WATCHER
+            elif flag_names == 'A':
+                flag_names = A_WATCHER + S_WATCHER
+            elif flag_names == 'B':
+                flag_names = S_WATCHER + A_WATCHER + B_WATCHER
+            elif flag_names == 'C':
+                flag_names = S_WATCHER + A_WATCHER + B_WATCHER + C_WATCHER
+            elif flag_names == 'cnn':
+                flag_names = CNN_WATCHER
+            elif flag_names == 'transformer':
+                flag_names = TRA_WATCHER
+            elif flag_names == 'ae':
+                flag_names = AEN_WATCHER
+            else:
+                self.logger.error(f"[WATCHER] Unknown flag name '{flag_names}'")
+                raise ValueError(f"[WATCHER] Unknown flag tier '{flag_names}'")
+        for top_name, low_name in flag_names:
+            if top_name not in self.watcher:
+                self.logger.error(f"Watcher {top_name} not found in watcher.")
+                raise ValueError(f"Watcher {top_name} not found in watcher.")
+            elif low_name not in self.watcher[top_name]:
+                self.logger.error(f"Watcher {low_name} not found in {top_name}.")
+                raise ValueError(f"Watcher {low_name} not found in {top_name}.")
+            else:
+                self.watcher[top_name][low_name] = not deactivate
+    def register_replay(self, predicted: torch.Tensor, target: torch.Tensor, mask: torch.Tensor = None) -> plt.Figure:
+        """
+        Visualizes predicted vs. target outputs with an optional mask.
+        Only positions where mask == True are shown. Each cell displays its value with two decimal places.
+        :param predicted: Tensor of shape (S) or (S, Y) representing the model's output.
+        :param target: Tensor of same shape as predicted.
+        :param mask: Optional boolean tensor of same shape. False positions are ignored (valid mask).
+        """
+        return register_replay(
+            predicted=predicted,
+            target=target,
+            valid_mask=mask,
+            element=self.replay_id[1],
+            epoch=self.__epoch_count,
+            writer=self.writer,
+            logger=self.logger,
+            tensorboard_required=self.tensorboard_required,
+        )
+    def register(self, name: str, parameter: float | torch.Tensor, mask: torch.Tensor = Ellipsis) -> None:
+        """
+        Registers a named parameter into the tensorboard.
+        :param name: The name of the parameter.
+        :param parameter: The parameter to register.
+        :param mask: The optional boolean tensor of same shape as parameter.
+        :return: Nothing.
+        """
+        if isinstance(parameter, torch.Tensor) and mask is Ellipsis:
+            mask = torch.ones_like(parameter).bool()
+        elif isinstance(parameter, float):
+            mask = Ellipsis
+        register(
+            flags=self.watcher,
+            tensor=parameter,
+            valid_mask=mask,
+            epoch=self.__epoch_count,
+            writer=self.writer,
+            logger=self.logger,
+            tensorboard_required=self.tensorboard_required,
+            parameter_name=name
+        )
+    def save_config(self, configuration):
+        """
+        Saves the configuration to a file.
+        :param configuration: A dataclasses configuration object.
+        :return: Nothing.
+        """
+        config_path = os.path.join(self.path, "config.json")
+        with open(config_path, "w") as f:
+            json.dump(asdict(configuration), f, indent=4)
+# - # - # - # - # - # - # - # - # - # - # - # - # - # - # - #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+    @staticmethod
+    def clear(path: str) -> None:
+        """
+        Clear the logs.
+        :param path: The path to the logs.
+        """
+        clear_logs(path)
+    @staticmethod
+    def set_logger(path: str) -> logging.Logger:
+        """
+        Set the logger.
+        :param path: The path to the logs.
+        :return: The logger.
+        """
+        return get_logger(path)
+    def set_writer(self, path: str, tensorboard_port: int | bool) -> tuple:
+        """
+        Get the writer.
+        :param path: The path to the logs.
+        :param tensorboard_port: The port to use for tensorboard.
+        :return: The writer.
+        """
+        return get_writer(path, tensorboard_port, self.logger)
+    def set_device(self, device: int) -> torch.device:
+        """
+        Get the device.
+        :param device: The device to use.
+        :return: The device.
+        """
+        return get_device(device, self.logger)
+    def set_seed(self, seed: int) -> int:
+        """
+        Get the seed.
+        :param seed: The seed to use.
+        :return: The seed.
+        """
+        return get_seed(seed, self.logger)
+    @property
+    def epoch(self):
+        """
+        Get the current epoch.
+        :return: The current epoch.
+        """
+        return self.__epoch_count
+    def __enter__(self):
+        return self
+    def __exit__(self, *exc):
+        if self.writer:
+            self.writer.close()
+        # Do not kill Tensor boards - We usually want the process up to analyze the train variables:
+        # for proc in psutil.process_iter(['pid', 'name']):
+        #     if 'tensorboard' in proc.info['name'].lower():
+        #         proc.terminate()
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/dlutils/setup/functions.py ADDED Viewed

	@@ -0,0 +1,227 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+import torch
+EPS = 1e-12
+# - # - # - # - # - # - # - # - # - # - # - # - # - # - # - #
+#                        REGISTER                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+def watch_max(
+        tensor: torch.Tensor,
+        mask: torch.Tensor,
+        grad: bool = False,
+) -> float:
+    if grad:
+        return float(tensor.grad[mask].abs().max())
+    elif hasattr(tensor, 'data'):
+        return float(tensor.data[mask].abs().max())
+    else:
+        return float(tensor[mask].abs().max())
+def watch_min(
+        tensor: torch.Tensor,
+        mask: torch.Tensor,
+        grad: bool = False,
+) -> float:
+    if grad:
+        return float(tensor.grad[mask].abs().min())
+    elif hasattr(tensor, 'data'):
+        return float(tensor.data[mask].abs().min())
+    else:
+        return float(tensor[mask].abs().min())
+def watch_mean(
+        tensor: torch.Tensor,
+        mask: torch.Tensor,
+        grad: bool = False,
+) -> float:
+    if grad:
+        return float(tensor.grad[mask].mean())
+    elif hasattr(tensor, 'data'):
+        return float(tensor.data[mask].mean())
+    else:
+        return float(tensor[mask].mean())
+def watch_var(
+        tensor: torch.Tensor,
+        mask: torch.Tensor,
+        grad: bool = False,
+) -> float:
+    if grad:
+        return float(tensor.grad[mask].var())
+    elif hasattr(tensor, 'data'):
+        return float(tensor.data[mask].var())
+    else:
+        return float(tensor[mask].var())
+def watch_std(
+        tensor: torch.Tensor,
+        mask: torch.Tensor,
+        grad: bool = False,
+) -> float:
+    if grad:
+        return float(tensor.grad[mask].std())
+    elif hasattr(tensor, 'data'):
+        return float(tensor.data[mask].std())
+    else:
+        return float(tensor[mask].std())
+def watch_sparsity(
+        tensor: torch.Tensor,
+        mask: torch.Tensor,
+        grad: bool = False,
+        sparsity_threshold: float = 1e-6,
+) -> float:
+    if grad:
+        return float((tensor.grad[mask].abs() <= sparsity_threshold).float().mean())
+    elif hasattr(tensor, 'data'):
+        return float((tensor.data[mask].abs() <= sparsity_threshold).float().mean())
+    else:
+        return float((tensor[mask].abs() <= sparsity_threshold).float().mean())
+def watch_l1(
+        tensor: torch.Tensor,
+        mask: torch.Tensor,
+        grad: bool = False,
+) -> float:
+    if grad:
+        return float(tensor.grad[mask].norm(p=1))
+    elif hasattr(tensor, 'data'):
+        return float(tensor.data[mask].norm(p=1))
+    else:
+        return float(tensor[mask].norm(p=1))
+def watch_l2(
+        tensor: torch.Tensor,
+        mask: torch.Tensor,
+        grad: bool = False,
+) -> float:
+    if grad:
+        return float(tensor.grad[mask].norm(p=2))
+    elif hasattr(tensor, 'data'):
+        return float(tensor.data[mask].norm(p=2))
+    else:
+        return float(tensor[mask].norm(p=2))
+def watch_snr(
+        tensor: torch.Tensor,
+        mask: torch.Tensor,
+        grad: bool = False,
+) -> None | float:
+    std = watch_std(tensor, mask, grad=grad)
+    if std <= 0:
+        return None
+    elif grad:
+        val = float(torch.log10((tensor.grad[mask].mean()).abs() / (std + EPS)))
+    elif hasattr(tensor, 'data'):
+        val = float(torch.log10((tensor.data[mask].mean()).abs() / (std + EPS)))
+    else:
+        val = float(torch.log10((tensor[mask].mean()).abs() / (std + EPS)))
+    return 20 * val if val != float("-inf") else None  # Check for NaN
+def watch_hist(
+        tensor: torch.Tensor,
+        mask: torch.Tensor,
+        grad: bool = False,
+) -> torch.Tensor:
+    if grad:
+        return tensor.grad[mask]
+    elif hasattr(tensor, 'data'):
+        return tensor.data[mask]
+    else:
+        return tensor[mask]
+def watch_rank(
+        tensor: torch.Tensor,
+        mask: torch.Tensor,
+        grad: bool = False,
+        threshold: float = 0.92,
+) -> None | float | int:
+    if grad:
+        work_tensor = tensor.grad
+    elif hasattr(tensor, 'data'):
+        work_tensor = tensor.data
+    else:
+        work_tensor = tensor
+    work_tensor = torch.multiply(work_tensor, mask.float())
+    if work_tensor.ndim < 2:
+        return None
+    else:
+        # Compute SVD and sort it:
+        work_tensor = torch.linalg.svdvals(work_tensor)
+        work_tensor = torch.sort(work_tensor, descending=True).values
+        # Cumulative energy:
+        work_tensor = torch.cumsum(work_tensor**2, dim=0) / (torch.sum(work_tensor**2) + EPS)
+        # Effective rank:
+        return float(torch.sum(work_tensor < threshold).item() + 1)
+def watch_any(
+        tensor: torch.Tensor,
+        mask: torch.Tensor,
+        grad: bool = False,
+) -> float:
+    if grad:
+        return float(tensor.grad[mask])
+    elif hasattr(tensor, 'data'):
+        return float(tensor.data[mask])
+    else:
+        return float(tensor[mask])
+def watch_power(
+        tensor: torch.Tensor,
+        mask: torch.Tensor,
+        grad: bool = False,
+) -> float:
+    if grad:
+        return float(10 * torch.log10((tensor.grad[mask] ** 2).mean() + EPS))
+    elif hasattr(tensor, 'data'):
+        return float(10 * torch.log10((tensor.data[mask] ** 2).mean() + EPS))
+    else:
+        return float(10 * torch.log10((tensor[mask] ** 2).mean() + EPS))
+# - # - # - # - # - # - # - # - # - # - # - # - # - # - # - #
+#                        FUNC. MAP                          #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+REG_FUNCTION_MAP = {
+    # Function mapping:
+    'max': watch_max,
+    'min': watch_min,
+    'mean': watch_mean,
+    'std': watch_std,
+    'var': watch_var,
+    'l2': watch_l2,
+    'l1': watch_l1,
+    'sparsity': watch_sparsity,
+    'snr': watch_snr,
+    'hist': watch_hist,
+    'rank': watch_rank,
+    'power': watch_power,
+    # Gradient mapping:
+    'grad_max': lambda x, y: watch_max(x, y, grad=True),
+    'grad_min': lambda x, y: watch_min(x, y, grad=True),
+    'grad_mean': lambda x, y: watch_mean(x, y, grad=True),
+    'grad_std': lambda x, y: watch_std(x, y, grad=True),
+    'grad_var': lambda x, y: watch_var(x, y, grad=True),
+    'grad_l1': lambda x, y: watch_l1(x, y, grad=True),
+    'grad_l2': lambda x, y: watch_l2(x, y, grad=True),
+    'grad_sparsity': lambda x, y: watch_sparsity(x, y, grad=True),
+    'grad_snr': lambda x, y: watch_snr(x, y, grad=True),
+    'grad_hist': lambda x, y: watch_hist(x, y, grad=True),
+    'grad_rank': lambda x, y: watch_rank(x, y, grad=True),
+    'grad_power': lambda x, y: watch_power(x, y, grad=True),
+    # Loss:
+    'loss': watch_any,
+    'val_loss': watch_any,
+    'lr': watch_any
+}
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/dlutils/setup/hooks.py ADDED Viewed

	@@ -0,0 +1,259 @@

+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
+#                         START OF FILE                             #
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
+import logging
+import torch
+from .functions import REG_FUNCTION_MAP
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
+#                                                                   #
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
+class HookMonitor:
+    """
+    Monitors forward activations and backward gradients of a PyTorch model by
+    registering hooks on all its submodules. The monitor computes per-layer
+    statistics defined in `REG_FUNCTION_MAP`, accumulating them during forward
+    and backward passes, and provides normalized results at the end.
+    This class is designed to be lightweight, safe (uses no_grad for activation
+    hooks), and usable as a context manager to automate attachment and cleanup
+    of hooks.
+    ----------------------------------------
+    Core Behavior
+    ----------------------------------------
+    - During the forward pass:
+        • A forward hook receives (module, input, output).
+        • The activation tensor is detached and cast to float.
+        • For each registered metric in REG_FUNCTION_MAP, if its watcher flag
+          is enabled, the metric is computed and accumulated.
+        • A gradient hook is registered on the output tensor so that gradient
+          statistics can also be collected during backpropagation.
+    - During backpropagation:
+        • The gradient hook receives the gradient tensor for the activation.
+        • Any metric marked as `grad_<metric>` in the watcher dictionary will be
+          applied to the gradient tensor and accumulated.
+    - Statistics:
+        • For each metric, the class tracks both the accumulated value and a
+          "/valid/" counter.
+        • `get_stats()` returns normalized statistics (sum / valid_count) for
+          each metric per layer.
+    ----------------------------------------
+    Parameters
+    ----------------------------------------
+    model : torch.nn.Module
+        The model whose modules will be monitored. All submodules returned by
+        `model.named_modules()` will receive a forward hook.
+    watcher : dict
+        A dictionary mapping metric names to boolean flags. Keys must match the
+        names used in `REG_FUNCTION_MAP`. Example:
+            {
+                "mean": True,
+                "std": True,
+                "grad_mean": True
+            }
+        Metrics not enabled here will not be computed.
+    logger : logging.Logger
+        A Logger used to report errors, debugging information, and warnings.
+    ----------------------------------------
+    Attributes
+    ----------------------------------------
+    stats : dict
+        Nested dictionary storing accumulated statistics per layer. Normalized
+        results are returned by `get_stats()`.
+    handles : list
+        A List of hook handles returned by `register_forward_hook`. These are
+        stored to later remove all hooks safely.
+    ----------------------------------------
+    Usage Example
+    ----------------------------------------
+    >>> model: torch.nn.Module
+    >>> watcher: dict[str, bool]
+    >>> logger: logging.Logger
+    >>> x: torch.Tensor
+    >>> loss: torch.nn.Module   # Loss
+    >>> monitor = HookMonitor(model, watcher, logger)
+    >>> monitor.attach()
+    >>> output = model(x)
+    >>> loss.backward()
+    >>> stats = monitor.get_stats()
+    >>> monitor.remove()
+    Or using a context manager:
+    >>> with HookMonitor(model, watcher, logger) as monitor:
+    ...     output = model(x)
+    ...     loss.backward()
+    >>> stats = monitor.get_stats()
+    ----------------------------------------
+    Notes
+    ----------------------------------------
+    - The gradient hook is attached to the activation tensor (module output),
+      not to model parameters.
+    - No gradients are tracked during forward hooks thanks to @torch.no_grad().
+    - The monitor does not interfere with the training process: it only reads
+      activations and gradients.
+    - Missing '/valid/' counters trigger an error log and skip normalization for
+      that metric.
+    """
+    def __init__(self, model: torch.nn.Module, watcher: dict, logger: logging.Logger):
+        """
+        Initialize a HookMonitor instance to track activation and gradient
+        statistics across all modules of a PyTorch model.
+        This constructor does not attach any hooks yet; it simply stores the
+        monitoring configuration. Hooks are registered only when `attach()` or
+        the context manager (`with HookMonitor(...)`) is used.
+        Parameters
+        ----------
+        model : torch.nn.Module
+            The model whose internal modules will be monitored. Every submodule
+            returned by `model.named_modules()` will receive a forward hook.
+        watcher : dict
+            Dictionary of boolean flags controlling which statistics should be
+            computed. Keys must match the names in `REG_FUNCTION_MAP`.
+            Example:
+                {
+                    "mean": True,
+                    "std": False,
+                    "grad_mean": True
+                }
+        Any metric not enabled here will not be computed during execution.
+        logger : logging.Logger
+            Logging instance used for reporting errors, debug messages and
+            warnings during monitoring operations.
+            Attributes Initialized
+            ----------------------
+            model : torch.nn.Module
+                Stored reference to the monitored model.
+            watcher : dict
+                The watcher configuration controlling metric activation.
+            stats : dict
+                Internal dictionary used to accumulate statistics across all layers.
+            handles : list
+                A List of hook handles created when calling `.attach()`. Each handle
+                is later used to safely remove hooks with `.remove()`.
+        Notes
+        -----
+        - No hooks are installed at construction time.
+        - The monitor becomes active only after calling `.attach()` or entering
+          a `with` block.
+        """
+        self.logger: logging.Logger = logger
+        self.model: torch.nn.Module = model
+        self.watcher: dict = watcher
+        self.stats: dict = dict()
+        self.handles: list = list()
+    def _build_hook(self, name):
+        @torch.no_grad()
+        def hook(*args):
+            _, _, act = args
+            if torch.is_tensor(act):
+                act_detached = act.detach().float()
+                s = self.stats.setdefault(name, {})
+                # Call functions:
+                for function_name, compute_function in REG_FUNCTION_MAP.items():
+                    if self.watcher.get(function_name, False) and not function_name.startswith('grad_'):
+                        value = compute_function(act_detached, ...)
+                        if value is not None:
+                            s[function_name] = s.get(function_name, 0.0) + value
+                            s[function_name + '/valid/'] = s.get(function_name + '/valid/', 0.0) + 1
+                # Grad hook:
+                def grad_hook(grad):
+                    gd = grad.detach().float()
+                    # Call functions:
+                    for gd_function_name, gd_compute_function in REG_FUNCTION_MAP.items():
+                        if self.watcher.get('grad_' + gd_function_name, False) and not gd_function_name.startswith('grad_'):
+                            gd_function_name = 'grad_' + gd_function_name
+                            gd_value = gd_compute_function(gd, ...)
+                            if gd_value is not None:
+                                s[gd_function_name] = s.get(gd_function_name, 0.0) + gd_value
+                                s[gd_function_name + '/valid/'] = s.get(gd_function_name + '/valid/', 0.0) + 1
+                if act.requires_grad:
+                    act.register_hook(grad_hook)
+        return hook
+    def get_stats(self) -> dict:
+        """
+        Get the statistics of the hooks.
+        :return: A dictionary with the statistics.
+        """
+        stats = dict()
+        for layer_name, layer_stats in self.stats.items():
+            sub_stats = dict()
+            for key, item in layer_stats.items():
+                if '/valid/' not in key:
+                    if key + '/valid/' in layer_stats:
+                        sub_stats[key] = item / layer_stats[key + '/valid/']
+                    else:
+                        self.logger.error(f"Key {key} has no valid count, skipping normalization.")
+                        sub_stats[key] = item
+            stats[layer_name] = sub_stats
+        return stats
+    def attach(self):
+        """
+        Registers all the hooks in the model.
+        :return: The object.
+        """
+        for name, module in self.model.named_modules():
+            h = module.register_forward_hook(self._build_hook(name))
+            self.handles.append(h)
+        return self
+    def clear(self):
+        """
+        Clear stats' dictionary.
+        :return: Nothing
+        """
+        self.stats.clear()
+    def remove(self):
+        """
+        Remove all the hooks from the model.
+        :return: Nothing.
+        """
+        for h in self.handles:
+            h.remove()
+        self.handles.clear()
+    def __enter__(self):
+        self.logger.debug("[Hooks] Attaching HookMonitor...")
+        return self.attach()
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.logger.debug("[Hooks] Removing HookMonitor...")
+        self.remove()
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
+#                          END OF FILE                              #
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

src/dlutils/setup/logger.py ADDED Viewed

	@@ -0,0 +1,91 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+import logging
+import os
+def get_logger(log_path: str, level: int | str = logging.INFO) -> logging.Logger:
+    """
+    Sets up a logger for debugging with colored output to the console and output to a specified log file.
+    Creates the directory if it does not exist.
+    Args:
+        log_path (str): The file path where the log file 'logfile.log' will be stored.
+        level (int | str): The logging level to be printed on the logger.
+    Raises:
+        ValueError: If the log_path is not valid.
+    """
+    # Check if log_path exists, create it if not
+    if not os.path.exists(log_path):
+        os.makedirs(log_path, exist_ok=True)
+    elif not os.path.isdir(log_path):
+        raise ValueError(f"Provided path '{log_path}' is not a directory.")
+    full_log_path = os.path.join(log_path, 'logfile.log')
+    # Transform level:
+    if isinstance(level, str):
+        level = level.upper()
+        if hasattr(logging, level):
+            level = getattr(logging, level)
+        else:
+            raise ValueError(f'The provided level for the logger <<{level}>> is not a valid level for logging.')
+    elif not isinstance(level, int):
+        raise ValueError(f'The provided level for the logger <<{level}>> is not a string or int, '
+                         f'the given type is <<{type(level)}>>.')
+    # Create a logger object
+    logger = logging.getLogger(__name__)
+    logger.handlers.clear()  # Avoid duplicates
+    logger.setLevel(level)  # Set the logging level to the given level
+    logger.propagate = False  # Prevent duplication in logging output
+    # Create file handler which logs even debug messages
+    fh = logging.FileHandler(full_log_path)
+    fh.setLevel(level)
+    fh.setFormatter(logging.Formatter('%(asctime)s: [%(levelname)s] %(message)s'))
+    # Create console handler with a colored formatter
+    ch = logging.StreamHandler()
+    ch.setLevel(level)
+    ch.setFormatter(ColoredFormatter())
+    # Add handlers to the logger
+    logger.addHandler(fh)
+    logger.addHandler(ch)
+    logger.info(f'Logger initialized with writer handler at: {full_log_path}')
+    return logger
+class ColoredFormatter(logging.Formatter):
+    grey = "\x1b[38;20m"
+    blue = "\x1b[34;20m"
+    cyan = "\x1b[36;20m"
+    orange = "\x1b[33;20m"
+    red = "\x1b[31;20m"
+    reset = "\x1b[0m"
+    format = '%(asctime)s: [%(levelname)s] %(message)s'
+    FORMATS = {
+        logging.DEBUG: blue + format + reset,
+        logging.INFO: cyan + format + reset,
+        logging.WARNING: orange + format + reset,
+        logging.ERROR: red + format + reset,
+        logging.CRITICAL: red + format + reset
+    }
+    def format(self, record):
+        log_fmt = self.FORMATS.get(record.levelno)
+        formatter = logging.Formatter(log_fmt, "%Y-%m-%d %H:%M:%S")
+        return formatter.format(record)
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/dlutils/setup/marker.py ADDED Viewed

	@@ -0,0 +1,251 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+import torch
+import logging
+import numpy as np
+import io
+import math
+import random
+from PIL import Image
+from matplotlib import pyplot as plt
+from torch.utils.tensorboard import SummaryWriter
+from torchvision import transforms
+from .functions import REG_FUNCTION_MAP
+# - # - # - # - # - # - # - # - # - # - # - # - # - # - # - #
+#                        REGISTER                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+@torch.no_grad()
+def register(
+        flags: dict,
+        tensor: float | torch.Tensor,
+        valid_mask: torch.Tensor,
+        epoch: int,
+        writer: SummaryWriter,
+        logger: logging.Logger,
+        tensorboard_required: bool,
+        parameter_name: str = ''
+):
+    """
+    Registers a parameter according to the register flags (DEFAULT_WATCHER style).
+    :param flags: A specific watch flag.
+    :param tensor: The tensor to register.
+    :param valid_mask: The valid mask to apply.
+    :param epoch: The current epoch.
+    :param writer: The tensorboard writer.
+    :param logger: The logger.
+    :param tensorboard_required: Whether the tensorboard writer is required.
+    :param parameter_name: The name of the parameter.
+    :return:
+    """
+    # 1. Detect tensor type:
+    if isinstance(tensor, torch.nn.Parameter):
+        flag_type = 'parameters'
+    elif isinstance(tensor, torch.Tensor):
+        # Intermediate activation:
+        flag_type = 'activations'
+    elif isinstance(tensor, float):
+        flag_type = 'train'
+    else:
+        raise ValueError(f"{type(tensor)} is not a torch.nn.Parameter or torch.Tensor.")
+    # 2. Build the tensor names:
+    safe_names = list()
+    # Check if the group is active:
+    if flag_type == 'parameters':
+        for flag_key, flag_value in flags['parameters'].items():
+            # Add if active:
+            if flag_value:
+                safe_names.append((f'{flag_type}/{flag_key}/{parameter_name}/', flag_key))
+    else:
+        safe_names.append((f'{flag_type}/{parameter_name}/', ''))
+    # 3. Write and compute each required variable:
+    for name, flag_key in safe_names:
+        # Compute the value:
+        transformation = None
+        if isinstance(tensor, torch.nn.Parameter):
+            if tensor.grad is not None and 'grad' in flag_key:
+                transformation = REG_FUNCTION_MAP[flag_key](tensor, valid_mask)
+        else:
+            transformation = float(tensor) if tensor is not None else None
+        # Write the value in tensorboard:
+        if transformation is not None:
+            write_tensorboard(
+                    name=name,
+                    value=transformation,
+                    epoch=epoch,
+                    writer=writer,
+                    logger=logger,
+                    tensorboard_required=tensorboard_required,
+            )
+# - # - # - # - # - # - # - # - # - # - # - # - # - # - # - #
+#                        REPLAY                             #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+@torch.no_grad()
+def register_replay(
+        predicted: torch.Tensor,
+        target: torch.Tensor,
+        epoch: int,
+        writer: SummaryWriter,
+        logger: logging.Logger,
+        valid_mask: torch.Tensor = Ellipsis,
+        element: int = None,
+        tensorboard_required: bool = True,
+) -> plt.Figure:
+    """
+    Registers a replay as an image.
+    :param predicted: The predicted value (prediction).
+    :param target: The expected value (labels).
+    :param epoch: The current epoch.
+    :param writer: The tensorboard writer.
+    :param logger: The logger.
+    :param valid_mask: A valid mask tensor of same shape. False positions are ignored (valid mask).
+    :param element: The element to register, None chooses a random batch element.
+    :param tensorboard_required: Whether the tensorboard writer is required.
+    :return: A matplotlib figure.
+    """
+    # Choose random element:
+    if element is None:
+        element = random.randint(0, len(predicted) - 1)
+    else:
+        element = min(len(predicted) - 1, max(0, element))
+    # Convert the chosen to numpy:
+    predicted_np = predicted[element].detach().cpu().numpy()
+    target_np = target[element].detach().cpu().numpy()
+    # Categorical to vector:
+    if not target_np.shape:
+        target_np_aux = np.zeros_like(predicted_np)
+        target_np_aux[target_np] = 1.
+        target_np = target_np_aux
+        del target_np_aux
+    # Mask the valid positions:
+    if valid_mask is not None:
+        mask_np = valid_mask[element].detach().cpu().numpy().astype(bool)
+    else:
+        mask_np = np.ones_like(predicted_np, dtype=bool)
+    # Apply mask and flatten:
+    predicted_flat = predicted_np[mask_np].flatten()
+    target_flat = target_np[mask_np].flatten()
+    # Compute square size B:
+    s = predicted_flat.shape[0]
+    b = math.ceil(math.sqrt(s))
+    total = b * b
+    pad = total - s
+    # Pad with zeros:
+    predicted_padded = np.pad(predicted_flat, (0, pad), constant_values=0.0).reshape(b, b)
+    target_padded = np.pad(target_flat, (0, pad), constant_values=0.0).reshape(b, b)
+    # Build figure:
+    fig, axs = plt.subplots(1, 2, figsize=(10, 5))
+    plot_with_values(axs[0], predicted_padded, "Predicted (y_hat)")
+    plot_with_values(axs[1], target_padded, "Target (y)")
+    plt.tight_layout()
+    write_tensorboard(
+        'replay/',
+        fig,
+        epoch=epoch,
+        writer=writer,
+        logger=logger,
+        tensorboard_required=tensorboard_required,
+    )
+    return fig
+def plot_with_values(ax, data, title):
+    """
+    Plots data with values and title.
+    :param ax: A matplotlib axes.
+    :param data: A numpy array.
+    :param title: The title of the plot.
+    :return:
+    """
+    ax.imshow(data, cmap='viridis', interpolation='nearest')
+    ax.set_title(title)
+    ax.axis('off')
+    for i in range(data.shape[0]):
+        for j in range(data.shape[1]):
+            text_color = "white" if data[i, j] < 0.5 else "black"
+            ax.text(j, i, f"{data[i, j]:.2f}", ha="center", va="center", color=text_color, fontsize=8)
+# - # - # - # - # - # - # - # - # - # - # - # - # - # - # - #
+#                  WRITE ON BASE                            #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+def write_tensorboard(
+        name: str,
+        value: int | float | plt.Figure | np.ndarray | torch.Tensor,
+        epoch: int,
+        writer: SummaryWriter,
+        logger: logging.Logger,
+        tensorboard_required: bool = True,
+) -> None:
+    """
+    Write to tensorboard.
+    :param name: The name of the tensorboard.
+    :param value: The value to write.
+    :param epoch: The current epoch.
+    :param writer: The tensorboard writer.
+    :param logger: The logger.
+    :param tensorboard_required: Whether the tensorboard writer is required.
+    """
+    # Check if the writer is None
+    if writer is None:
+        if tensorboard_required:
+            logger.warning("Writer is None. Please set the writer first.")
+        return
+    # Check if the value is None
+    if value is None:
+        logger.warning("Value is None. Please set the value first.")
+        return
+    # Check if the name is None
+    if name is None:
+        logger.warning("Name is None. Please set the name first.")
+        return
+    # Type check:
+    if isinstance(value, int):
+        writer.add_scalar(name, float(value), epoch)
+    elif isinstance(value, float):
+        writer.add_scalar(name, value, epoch)
+    elif isinstance(value, torch.Tensor):
+        value = value.detach().cpu().numpy()
+        writer.add_histogram(name, value, epoch)
+    elif isinstance(value, list):
+        value = np.array(value)
+        writer.add_histogram(name, value, epoch)
+    elif isinstance(value, np.ndarray):
+        writer.add_histogram(name, value, epoch)
+    elif isinstance(value, str):
+        writer.add_text(name, value, epoch)
+    elif isinstance(value, bytes):
+        image = Image.open(io.BytesIO(value))
+        transform = transforms.ToTensor()
+        value = transform(image)
+        writer.add_image(name, value, epoch)
+    elif isinstance(value, plt.Figure):
+        buf = io.BytesIO()
+        value.savefig(buf, format='png')
+        buf.seek(0)
+        image = Image.open(buf)
+        image = transforms.ToTensor()(image)
+        writer.add_image(name, image, epoch)
+        plt.close()
+    else:
+        raise ValueError(f"Type {type(value)} not supported.")
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/dlutils/setup/seeds.py ADDED Viewed

	@@ -0,0 +1,71 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+import logging
+import torch
+import os
+import numpy as np
+import random
+import time
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+CUBLAS_ALLOCATION = 4096
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+def get_seed(seed: int = None, logger: logging.Logger = None) -> int:
+    """
+    Sets the seed for generating random numbers to ensure reproducibility across numpy, random, and PyTorch operations.
+    If no seed is provided, a new seed is generated based on the current time.
+    This function also configures PyTorch to ensure deterministic behavior when running on a GPU, including the setting
+    of environment variables to influence the behavior of CUDA's cuBLAS library.
+    Args:
+        seed (int, optional): The seed for the random number generators. If None, the seed will be generated based on
+        the current system time.
+        logger (logging.Logger): The logger that traces the logging information.
+    Returns:
+        int: The seed used to initialize the random number generators.
+    Example:
+        >>> experiment_seed = get_seed()
+        Sets a random seed based on the current time and ensures that all subsequent random operations are reproducible.
+        >>> experiment_seed = get_seed(42)
+        >>> # experiment_seed == 42
+        Uses 42 as the seed for all random number generators to ensure reproducibility.
+    """
+    # Set environment variable for deterministic behavior on CUDA >= 10.2
+    os.environ["CUBLAS_WORKSPACE_CONFIG"] = f":{CUBLAS_ALLOCATION}:8"
+    # Create a new seed if not provided:
+    seed = seed if seed is not None else int(time.time())
+    # Set seed for numpy and random
+    np.random.seed(seed)
+    random.seed(seed)
+    # Set seed and deterministic algorithms for torch
+    torch.manual_seed(seed)
+    torch.backends.cudnn.allow_tf32 = False
+    torch.use_deterministic_algorithms(True, warn_only=True)
+    # Ensure all operations are deterministic on GPU (if available)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+    # Return the generated or bypassed seed:
+    if logger is not None:
+        logger.info(f"Initializer set up seed: {seed}")
+    return seed
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/dlutils/setup/tensorboard.py ADDED Viewed

	@@ -0,0 +1,74 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+import logging
+import os
+import psutil
+import time
+import subprocess
+from torch.utils.tensorboard import SummaryWriter
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+DEFAULT_TENSORBOARD_PORT = 6006
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+def get_writer(path: str, tensorboard_port: int | bool, logger: logging.Logger = None):
+    """
+    Sets up a TensorBoard logging and checkpoint directory for PyTorch.
+    This function clears the specified directory, creates subdirectories for TensorBoard logs
+    and model checkpoints, ensuring a clean environment for running new training sessions.
+    Args:
+        path (str): The root directory where TensorBoard logs and checkpoints will be stored.
+        tensorboard_port (int): The port on which to run the TensorBoard.
+        logger (logging.Logger): The logger that traces the logging information.
+    Returns:
+        tuple: A tuple containing the TensorBoard SummaryWriter object and the path for checkpoints.
+    Example:
+        >>> tensor_writer, checkpoint_dir = get_writer('/path/to/tensorboard/')
+    """
+    # Check tensorboard port:
+    if tensorboard_port is True:
+        tensorboard_port = DEFAULT_TENSORBOARD_PORT
+    elif tensorboard_port is False:
+        return None, os.path.join(path, 'checkpoints')
+    # Create subdirectories for logs and checkpoints
+    logs_path = os.path.join(path, 'logs')
+    checkpoints_path = os.path.join(path, 'checkpoints')
+    os.makedirs(logs_path, exist_ok=True)
+    os.makedirs(checkpoints_path, exist_ok=True)
+    # Set up TensorBoard logging
+    writer = SummaryWriter(log_dir=logs_path)
+    # Print paths where logs and checkpoints will be stored
+    if logger is not None:
+        logger.info(f"TensorBoard logs will be stored in: {logs_path}")
+        logger.info(f"Model checkpoints will be stored in: {checkpoints_path}")
+    # Launch tensorboard:
+    for conn in psutil.net_connections(kind='inet'):
+        if conn.laddr.port == tensorboard_port and conn.status == psutil.CONN_LISTEN:
+            if logger is not None:
+                logger.warning(f"Killing already running TensorBoard process with PID {conn.pid}")
+            p = psutil.Process(conn.pid)
+            p.terminate()
+            p.wait(timeout=3)
+            time.sleep(5)
+    process = subprocess.Popen(f'tensorboard --logdir={logs_path} --host=0.0.0.0 --port={tensorboard_port}',
+                               shell=True)
+    if logger is not None:
+        logger.info(f'TensorBoard running at http://0.0.0.0:{tensorboard_port}/ (pid={process.pid})')
+    return writer, checkpoints_path
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/dlutils/setup/watchers.py ADDED Viewed

	@@ -0,0 +1,153 @@

+# - # - # - # - # - # - # - # - # - # - # - # - # - # - # - #
+#                  DEFAULT WATCH                            #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+DEFAULT_WATCHER = {
+    'train': {
+        'loss': True,
+        'lr': False,
+        'val_loss': True
+    },
+    'parameters': {
+        'max': False,
+        'min': False,
+        'mean': False,
+        'std': False,
+        'var': False,
+        'hist': False,
+        'l2': False,
+        'l1': False,
+        'sparsity': False,
+        'snr': False,
+        'rank': False,
+        'power': False,
+        # Gradients:
+        'grad_max': False,
+        'grad_min': False,
+        'grad_mean': False,
+        'grad_std': False,
+        'grad_var': False,
+        'grad_hist': False,
+        'grad_l2': False,
+        'grad_l1': False,
+        'grad_sparsity': False,
+        'grad_snr': False,
+        'grad_rank': False,
+        'grad_power': False
+    },
+    'activations': {
+        'max': False,
+        'min': False,
+        'mean': False,
+        'std': False,
+        'var': False,
+        'hist': False,
+        'l2': False,
+        'l1': False,
+        'sparsity': False,
+        'snr': False,
+        'rank': False,
+        'power': False,
+        # Gradients:
+        'grad_max': False,
+        'grad_min': False,
+        'grad_mean': False,
+        'grad_std': False,
+        'grad_var': False,
+        'grad_hist': False,
+        'grad_l2': False,
+        'grad_l1': False,
+        'grad_sparsity': False,
+        'grad_snr': False,
+        'grad_rank': False,
+        'grad_power': False
+    }
+}
+# - # - # - # - # - # - # - # - # - # - # - # - # - # - # - #
+#                  SPECIFIC WATCH                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#   [PA]    Performance analysis.
+#   [GF]    Gradient flow.
+#   [AD]    Activation death.
+#   [NT]    Network topology.
+S_WATCHER = [
+    ('train', 'loss'),              # [TOP]    [PA] Evolución del entrenamiento.
+    ('train', 'val_loss'),          # [TOP]    [PA] Generalización / overfitting.
+    ('parameters', 'grad_power'),   # [TOP]    [GF] Flujo de gradiente, explosión/vanishing global.
+    ('parameters', 'grad_mean'),    # [TOP]    [NT] Capas muertas / inútiles (mean grad ~ 0).
+    ('parameters', 'grad_max'),     # [TOP]    [GF] Picos de grad -> clipping / LR.
+    ('activations', 'grad_power'),  # [TOP]    [GF] Flujo de grad por capa (muy informativa).
+    ('activations', 'sparsity'),    # [TOP]    [AD] ReLU death / atención colapsada.
+]
+A_WATCHER = [
+    ('train', 'lr'),                # [USEFUL] [PA] Seguir el scheduler / warmup.
+    ('parameters', 'l2'),           # [USEFUL] [PA] Norm de pesos, regularización / weight decay.
+    ('parameters', 'power'),        # [USEFUL] [PA] Escala de pesos / posibles explosiones.
+    ('parameters', 'grad_snr'),     # [USEFUL] [GF] Coherencia señal/ruido del grad.
+    ('parameters', 'rank'),         # [USEFUL] [NT] Capacidad efectiva / colapso de parámetros.
+    ('activations', 'mean'),        # [USEFUL] [NT] Shift de activaciones / mala init.
+    ('activations', 'std'),         # [USEFUL] [NT] Propagación de señal entre capas.
+    ('activations', 'snr'),         # [USEFUL] [NT] Coherencia de señal entre capas.
+    ('activations', 'grad_snr'),    # [USEFUL] [GF] Coherencia del grad por capa.
+]
+B_WATCHER = [
+    ('activations', 'hist'),        # [UTILITY] [AD] Visualizar colas raras / saturaciones.
+    ('parameters', 'snr'),          # [UTILITY] [NT] Coherencia global de pesos (rank suele ser mejor).
+    ('parameters', 'grad_l2'),      # [UTILITY] [GF] Similar a grad_power pero menos intuitiva.
+    ('parameters', 'hist'),         # [UTILITY] [PA] Ver distribución de pesos (debug puntual).
+    ('activations', 'l2'),          # [UTILITY] [NT] Magnitud de activaciones (redundante con std/power).
+    ('activations', 'l1'),          # [UTILITY] [NT] Similar a l2; a veces útil en AEs sparsos.
+]
+C_WATCHER = [
+    ('parameters', 'max'),          # [LOW]    [PA] Útil sólo para detectar NaNs / inf puntuales.
+    ('parameters', 'min'),          # [LOW]    [PA] Igual que max, poco signal.
+    ('parameters', 'mean'),         # [LOW]    [PA] Poco interpretable sin más contexto.
+    ('parameters', 'std'),          # [LOW]    [PA] Redundante con power / l2.
+    ('parameters', 'var'),          # [LOW]    [PA] Redundante con std.
+    ('parameters', 'grad_var'),     # [LOW]    [GF] Redundante con grad_std.
+    ('parameters', 'grad_hist'),    # [LOW]    [GF] Visualización puntual, no para logging continuo.
+    ('activations', 'min'),         # [LOW]    [NT] Rara vez dice algo que std/mean no digan.
+    ('activations', 'max'),         # [LOW]    [NT] Sólo útil para comprobar clamps/NaNs.
+    ('activations', 'var'),         # [LOW]    [NT] Redundante con std.
+    ('activations', 'grad_hist'),   # [LOW]    [GF] Igual que grad_hist de parámetros, solo visual.
+    ('activations', 'grad_var'),    # [LOW]    [GF] Redundante con grad_std/grad_power.
+]
+CNN_WATCHER = [
+    ('train', 'loss'),              # [TOP]    [PA] Fit de entrenamiento.
+    ('train', 'val_loss'),          # [TOP]    [PA] Generalización (Imagenette/ImageNet).
+    ('parameters', 'grad_power'),   # [TOP]    [GF] Explosión/vanishing global del grad.
+    ('parameters', 'grad_max'),     # [TOP]    [GF] Picos por capa -> clipping.
+    ('activations', 'grad_power'),  # [TOP]    [GF] Grad por bloque conv / head.
+    ('activations', 'sparsity'),    # [TOP]    [AD] Dead ReLU / capas muertas.
+    ('activations', 'std'),         # [USEFUL] [NT] Propagación de señal (init, BN).
+    ('parameters', 'l2'),           # [USEFUL] [PA] Control de norm de pesos / decay.
+]
+TRA_WATCHER = [
+    ('train', 'loss'),              # [TOP]    [PA] Fit del modelo (LM / seq2seq / cls).
+    ('train', 'val_loss'),          # [TOP]    [PA] Generalización / overfitting.
+    ('train', 'lr'),                # [USEFUL] [PA] Warmup, cosine, etc.
+    ('parameters', 'grad_power'),   # [TOP]    [GF] Explosión/vanishing en profundidad.
+    ('parameters', 'grad_snr'),     # [USEFUL] [GF] SNR de grad en bloques de atención/MLP.
+    ('activations', 'grad_power'),  # [TOP]    [GF] Flujo de grad por layer encoder/decoder.
+    ('activations', 'mean'),        # [USEFUL] [NT] Drift en LayerNorm / RMSNorm.
+    ('activations', 'std'),         # [USEFUL] [NT] Propagación en profundidad (residuals).
+    ('parameters', 'l2'),           # [USEFUL] [PA] Tamaño de pesos en attention/MLP.
+]
+AEN_WATCHER = [
+    ('train', 'loss'),              # [TOP]    [PA] Reconstr / contrastive / VAE loss.
+    ('train', 'val_loss'),          # [TOP]    [PA] Generalización del AE.
+    ('parameters', 'grad_power'),   # [TOP]    [GF] Flujo de grad encoder/decoder.
+    ('activations', 'sparsity'),    # [TOP]    [AD] Codificadores sparsos / muerte de neuronas.
+    ('activations', 'rank'),        # [USEFUL] [NT] Colapso de representación / baja dimensión efectiva.
+    ('parameters', 'power'),        # [USEFUL] [PA] Pesos del decoder explotando o colapsando.
+    ('activations', 'grad_power'),  # [TOP]    [GF] Grad por capa en encoder/decoder.
+    ('parameters', 'l2'),           # [USEFUL] [PA] Norm de pesos, sobretodo en AEs profundos.
+]

src/dlutils/steps.py ADDED Viewed

	@@ -0,0 +1,246 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+import torch
+import numpy as np
+import tqdm
+from .setup import Setup, HookMonitor
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+def train_step(
+        # Always granted:
+        model: torch.nn.Module,
+        data: torch.utils.data.DataLoader,
+        loss: torch.nn.Module,
+        optimizer: torch.optim.Optimizer,
+        controller: Setup,
+        # Not always granted:
+        scheduler: torch.optim.lr_scheduler.LRScheduler = None,
+) -> float:
+    """
+    Performs a single training step including forward pass, loss calculation, backward pass,
+    and optimization step.
+    Parameters:
+        model (torch.nn.Module): The model to be trained.
+        data (torch.utils.data.DataLoader): DataLoader providing the training data.
+        loss (torch.nn.Module): Loss function to be used.
+        optimizer (torch.optim.Optimizer): Optimizer used for gradient updates.
+        controller (Setup): The setup object containing configuration and state.
+        scheduler (torch.optim.lr_scheduler._LRScheduler, optional): Learning rate scheduler to adjust the learning rate.
+    Returns:
+        float: The mean loss value for this training step.
+    """
+    # Train mode:
+    model.to(controller.device)
+    model.train()
+    # Train the model for dataloaders or iterators:
+    losses = list()
+    with HookMonitor(model, controller.watcher['activations'], controller.logger) as hooks:
+        with tqdm.tqdm(data, desc=f'\rTraining epoch {controller.epoch}', leave=True) as pbar:
+            pbar: torch.DataLoader
+            hooks: HookMonitor
+            for i, element in enumerate(pbar):
+                # 1. Gather elements:
+                args = tuple()
+                if len(element) == 2:
+                    # Prediction:
+                    x, y = element
+                    x_m, y_m = None, None
+                elif len(element) == 3:
+                    # Prediction with x_mask:
+                    x, y, x_m = element
+                    y_m = None
+                elif len(element) == 4:
+                    # Prediction with x_mask and y_mask:
+                    x, y, x_m, y_m = element
+                elif len(element) > 4:
+                    # More input arguments:
+                    x, y = element[0], element[1]
+                    x_m, y_m = element[2], element[3]
+                    args = element[4:]
+                else:
+                    raise ValueError("DataLoader elements must have at least two elements.")
+                # 2. Load data to device:
+                x, y = x.to(controller.device, non_blocking=True), y.to(controller.device, non_blocking=True)
+                optimizer.zero_grad()
+                if x_m is not None:
+                    x_m = x_m.to(controller.device, non_blocking=True)
+                if y_m is not None:
+                    y_m = y_m.to(controller.device, non_blocking=True)
+                # 3. TRAIN - Control autocast (mem-speed):
+                if controller.autoscaler is not None:
+                    with torch.amp.autocast(enabled=(controller.device.type == 'cuda'), device_type=controller.device.type):
+                        # Forward:
+                        y_hat = model(x, x_m, *args) if x_m is not None else model(x)
+                        loss_metric = loss(y_hat, y, y_m) if y_m is not None else loss(y_hat, y)
+                    # Backward:
+                    controller.autoscaler.scale(loss_metric).backward()
+                    controller.autoscaler.step(optimizer)
+                    controller.autoscaler.update()
+                else:
+                    # Forward:
+                    y_hat = model(x, x_m, *args) if x_m is not None else model(x)
+                    loss_metric = loss(y_hat, y, y_m) if y_m is not None else loss(y_hat, y)
+                    # Backward:
+                    loss_metric.backward()
+                    optimizer.step()
+                # 4. Append to metrics:
+                losses.append(loss_metric.item())
+                # 5. Monitor hooks:
+                if controller.replay_id[0] == i:
+                    controller.register_replay(predicted=y_hat, target=y, mask=y_m)
+        # Write in summary writer (per epoch):
+        losses = np.array(losses)
+        mean_loss = float(np.mean(losses))
+        # ================ WATCH ================
+        # Register parameters:
+        for name, parameter in model.named_parameters():
+            controller.register(name, parameter)
+        # Register train:
+        controller.register('loss', mean_loss)
+        # Register hooks:
+        for layer_name, layer_stats in hooks.get_stats().items():
+            for func_name, item in layer_stats.items():
+                controller.register(f'{func_name}/{layer_name}', torch.Tensor([item])[0])
+        # ================ CONTROL ================
+        # Scheduler step:
+        if scheduler is not None:
+            controller.register('lr', scheduler.get_last_lr()[0])
+            scheduler.step()
+        # Write for logger:
+        controller.logger.info(f"Epoch [{controller.epoch}]: loss = {mean_loss:.8f}")
+        # Checkpointing:
+        controller.check(model, optimizer, scheduler)
+    return mean_loss
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+def validation_step(
+        # Always granted:
+        model: torch.nn.Module,
+        data: torch.utils.data.DataLoader,
+        loss: torch.nn.Module,
+        controller: Setup,
+        additional_metrics: dict = (),
+) -> dict:
+    """
+    Performs a single validation step including forward pass and loss calculation.
+    Parameters:
+        model (torch.nn.Module): The model to be validated.
+        data (torch.utils.data.DataLoader): DataLoader providing the validation data.
+        loss (torch.nn.Module): Loss function to be used.
+        controller (Setup): The setup object containing configuration and state.
+        additional_metrics (dict): Additional metrics to calculate for each epoch.
+    Returns:
+        float: The mean loss value for this validation step.
+    """
+    # Validation mode:
+    model.to(controller.device)
+    model.eval()
+    # Validation the model for dataloaders or iterators:
+    losses = list()
+    metrics: dict[str, list | float] = {name: list() for name in additional_metrics}
+    with torch.no_grad():
+        with tqdm.tqdm(data, desc=f'\rValidation epoch {controller.epoch}', leave=True) as pbar:
+            pbar: torch.DataLoader
+            for element in pbar:
+                # Gather elements:
+                if len(element) == 2:
+                    # Prediction:
+                    x, y = element
+                    x_m, y_m = None, None
+                    args = tuple()
+                elif len(element) == 3:
+                    # Prediction with x_mask:
+                    x, y, x_m = element
+                    y_m = None
+                    args = tuple()
+                elif len(element) == 4:
+                    # Prediction with x_mask and y_mask:
+                    x, y, x_m, y_m = element
+                elif len(element) > 4:
+                    # More input arguments:
+                    x, y = element[0], element[1]
+                    x_m, y_m = element[2], element[3]
+                    args = element[4:]
+                else:
+                    raise ValueError("DataLoader elements must have at least two elements.")
+                # Load data to device:
+                x, y = x.to(controller.device, non_blocking=True), y.to(controller.device, non_blocking=True)
+                if x_m is not None:
+                    x_m = x_m.to(controller.device, non_blocking=True)
+                if y_m is not None:
+                    y_m = y_m.to(controller.device, non_blocking=True)
+                # Control autocast (mem-speed):
+                if controller.autoscaler is not None:
+                    with torch.amp.autocast(enabled=(controller.device.type == 'cuda'),
+                                            device_type=controller.device.type):
+                        # Forward:
+                        y_hat = model(x, x_m, *args) if x_m is not None else model(x)
+                        loss_metric = loss(y_hat, y, y_m) if y_m is not None else loss(y_hat, y)
+                        # Compute additional metrics:
+                        if additional_metrics:
+                            for name, additional_metric in additional_metrics.items():
+                                metrics[name].append(additional_metric(y_hat, y, y_m).item())
+                else:
+                    # Forward:
+                    y_hat = model(x, x_m, *args) if x_m is not None else model(x)
+                    loss_metric = loss(y_hat, y, y_m) if y_m is not None else loss(y_hat, y)
+                    # Compute additional metrics:
+                    if additional_metrics:
+                        for name, additional_metric in additional_metrics.items():
+                            metrics[name].append(additional_metric(y_hat, y, y_m).item())
+                # Append to metrics:
+                losses.append(loss_metric.item())
+    # Convert:
+    losses = np.array(losses)
+    mean_loss = float(np.mean(losses))
+    # Additional metrics:
+    for name, variable in metrics.items():
+        metrics[name] = float(np.mean(variable))
+    metrics['loss'] = mean_loss
+        # Write to register:
+    controller.register("val_loss", mean_loss)
+    # Write for logger:
+    controller.logger.info(f"Epoch [{controller.epoch}]: val_loss = {mean_loss:.8f}")
+    return metrics
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/model/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+from .config import ModelConfig, TransformerConfig, CoSeNetConfig
+from .segmentation import SegmentationNetwork
+from .loss import MaskedBCELoss
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/model/config.py ADDED Viewed

	@@ -0,0 +1,37 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+from typing import List
+from dataclasses import dataclass, field
+@dataclass
+class CoSeNetConfig:
+    trainable: bool = True
+    init_scale: float = 5.0
+@dataclass
+class TransformerConfig:
+    attention_heads: int = 8
+    feed_forward_multiplier: float = 4
+    dropout: float = 0.0
+    pre_normalize: bool = True
+@dataclass
+class ModelConfig:
+    vocab_size: int = 2 ** 15
+    model_dim: int = 256
+    max_tokens: int = 382
+    max_sentences: int = 384
+    valid_padding: bool = True
+    cosenet: CoSeNetConfig = field(default_factory=CoSeNetConfig)
+    transformers: List[TransformerConfig] = field(default_factory=lambda: [TransformerConfig()])
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/model/cosenet/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+from .cosenet import CoSeNet
+from .cosine_distance import CosineDistanceLayer
+from .trainable_sigmoid import TrainableSigmoid
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/model/cosenet/cosenet.py ADDED Viewed

	@@ -0,0 +1,189 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+import torch
+import os
+import numpy as np
+from .cosenet_layer import CoSeNetLayer
+from .trainable_sigmoid import TrainableSigmoid
+class CoSeNet(torch.nn.Module):
+    """
+    PyTorch's implementation of the CoSeNet architecture.
+    This module loads pre-trained CoSeNet weights and applies a structured
+    unfolding–linear–folding pipeline to the input tensor. An optional
+    trainable sigmoid adaptation is applied to the input prior to the
+    CoSeNet transformation.
+    The architecture assumes that the input data represent structured
+    matrices (e.g., similarity or distance matrices) and performs
+    diagonal-based unfolding with overlapping windows.
+    """
+    def __init__(self, trainable: bool = False, init_scale: float = 5.0, **kwargs):
+        """
+        Initialize the CoSeNet model.
+        Pre-trained weights and biases are loaded from disk and used to
+        construct the internal CoSeNet layer. Optionally, the parameters
+        can be set as trainable.
+        Args:
+            trainable (bool, optional): Whether the CoSeNet linear layer
+                parameters should be trainable. Defaults to False.
+            init_scale (float, optional): Initial scale for the trainable
+                sigmoid adaptation module. Defaults to 5.0.
+            **kwargs: Additional keyword arguments forwarded to
+                `torch.nn.Module`.
+        Raises:
+            FileNotFoundError: If the weight or bias files cannot be found.
+        """
+        super().__init__(**kwargs)
+        # Load weights:
+        this_file_name = os.path.dirname(os.path.abspath(__file__))
+        w_path = os.path.join(this_file_name, 'weights', 'w.npy')
+        b_path = os.path.join(this_file_name, 'weights', 'b.npy')
+        if not os.path.exists(w_path):
+            raise FileNotFoundError(f'CoSeNet weight file {w_path} does not exist.')
+        if not os.path.exists(b_path):
+            raise FileNotFoundError(f'CoSeNet bias file {b_path} does not exist.')
+        w, b = np.load(w_path), np.load(b_path)
+        # Build layers:
+        self.matrix_shape = int(np.sqrt(w.shape[-1]))
+        self.layer = CoSeNetLayer(w, b, trainable=trainable)
+        self.adaptation = TrainableSigmoid(init_scale=init_scale)
+    def forward(self, x: torch.Tensor, mask: torch.Tensor = None) -> torch.Tensor:
+        """
+        Forward pass of the CoSeNet model.
+        The input is first adapted using a trainable sigmoid, then padded,
+        unfolded along the diagonal, processed by the CoSeNet linear layer,
+        and finally folded back into its original structure. An optional
+        external mask can be applied to the output.
+        Args:
+            x (torch.Tensor): Input tensor containing structured matrix data.
+            mask (torch.Tensor, optional): Optional mask tensor applied
+                element-wise to the output. Defaults to None.
+        Returns:
+            torch.Tensor: Output tensor with the same spatial structure as
+            the input.
+        """
+        # check dimension:
+        if x.dim() < 2:
+            raise ValueError(f'CoSeNet input: at least 2 dimensions required. (got {x.dim()})')
+        # Check perfect square:
+        if x.shape[-1] != x.shape[-2]:
+            raise ValueError(f'CoSeNet input: last two dimensions must be equal. ({x.shape[-2]} != {x.shape[-1]})')
+        adapted_x = self.adaptation(x)
+        pad_x, pad_mask = self.__cosenet_padding(adapted_x)
+        unfold_x = self.__unfold(pad_x)
+        unfold_y = self.layer(unfold_x)
+        y = self.__fold(unfold_y, pad_mask)
+        if mask is not None:
+            y = torch.multiply(y, mask)
+        return y
+    def __unfold(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Unfold the input tensor into overlapping diagonal blocks.
+        The unfolding is performed using a sliding window over the last
+        two dimensions, followed by diagonal extraction. The stride is
+        determined by half of the matrix size.
+        Args:
+            x (torch.Tensor): Padded input tensor.
+        Returns:
+            torch.Tensor: Tensor containing unfolded diagonal blocks with
+            shape [..., K, L, L], where K is the number of extracted blocks.
+        """
+        step = max(1, self.matrix_shape // 2)
+        u = x.unfold(-2, self.matrix_shape, step).unfold(-2, self.matrix_shape, step)
+        y = u.diagonal(offset=0, dim1=-4, dim2=-3).movedim(-1, 1)
+        return y
+    @staticmethod
+    def __fold(x: torch.Tensor, pad_mask: torch.Tensor) -> torch.Tensor:
+        """
+        Fold unfolded CoSeNet outputs back into a full matrix.
+        Overlapping regions are combined using an averaging strategy to
+        account for multiple contributions to the same spatial location.
+        Args:
+            x (torch.Tensor): Tensor containing unfolded CoSeNet outputs.
+            pad_mask (torch.Tensor): Boolean mask indicating valid (non-padded)
+                positions.
+        Returns:
+            torch.Tensor: Folded tensor with padding removed and original
+            structure restored.
+        """
+        if x.shape[-2] > 1:
+            y = torch.zeros(
+                list(x.shape[:-2]) + [x.shape[-1] * (x.shape[-2] + 1) // 2],
+                device=x.device,
+            )
+            t = x.shape[-1] // 2
+            for i in range(x.shape[-2]):
+                y[..., i * t + 1: t * (i + 2)] += 0.5 * x[..., i, 1:]
+                y[..., i * t] *= 2
+            y[..., :t] *= 2
+            y[..., -t:] *= 2
+            y[..., 0] = 1
+        else:
+            y = x[..., 0, :]
+        return y[pad_mask].view(pad_mask.shape)
+    def __cosenet_padding(self, x: torch.Tensor) -> tuple:
+        """
+        Pad the input tensor to match the required matrix shape.
+        Padding is applied along the last two dimensions to ensure that
+        their sizes are multiples of the CoSeNet matrix shape. A diagonal
+        mask is generated to distinguish padded elements.
+        Args:
+            x (torch.Tensor): Original input tensor.
+        Returns:
+            tuple:
+                - torch.Tensor: Padded tensor with diagonal correction.
+                - torch.Tensor: Boolean mask indicating valid entries.
+        """
+        pad_w = (self.matrix_shape - (x.shape[-1] % self.matrix_shape)) % self.matrix_shape
+        pad_h = (self.matrix_shape - (x.shape[-2] % self.matrix_shape)) % self.matrix_shape
+        x = torch.nn.functional.pad(x, (0, pad_w, 0, pad_h))
+        diag = x.diagonal(dim1=-2, dim2=-1)
+        mask_bool = (diag == 0)
+        mask01 = mask_bool.to(x.dtype)
+        x = x + torch.diag_embed(mask01)
+        return x, torch.logical_not(mask_bool)
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/model/cosenet/cosenet_layer.py ADDED Viewed

	@@ -0,0 +1,55 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+import torch
+import numpy as np
+class CoSeNetLayer(torch.nn.Module):
+    """
+    Linear layer for CoSeNet with optional trainable parameters.
+    This module implements a single linear transformation used within
+    CoSeNet, assuming the input has already been padded and segmented.
+    The layer supports fixed (non-trainable) or learnable weights and
+    biases, enabling its use in both frozen and fine-tuning scenarios.
+    """
+    def __init__(self, coef: np.ndarray, intercept: np.ndarray, trainable: bool = False, **kwargs):
+        """
+        Initialize the CoSeNet layer.
+        Args:
+            coef (np.ndarray): Weight matrix used for the linear transformation.
+            intercept (np.ndarray): Bias vector added to the linear output.
+            trainable (bool, optional): Whether the weights and bias should be
+                optimized during training. Defaults to False.
+            **kwargs: Additional keyword arguments forwarded to
+                `torch.nn.Module`.
+        """
+        super().__init__(**kwargs)
+        self.weight = torch.nn.Parameter(torch.tensor(coef, dtype=torch.float32), requires_grad=trainable)
+        self.bias = torch.nn.Parameter(torch.tensor(intercept, dtype=torch.float32), requires_grad=trainable)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Apply the linear transformation to the input tensor.
+        The input tensor is flattened across the last two dimensions
+        before applying the linear operation.
+        Args:
+            x (torch.Tensor): Input tensor with shape [..., *, *], where the
+                last two dimensions are flattened prior to the linear mapping.
+        Returns:
+            torch.Tensor: Output tensor resulting from the linear transformation.
+        """
+        return torch.nn.functional.linear(x.flatten(-2), self.weight, self.bias)
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/model/cosenet/cosine_distance.py ADDED Viewed

	@@ -0,0 +1,57 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+import torch
+class CosineDistanceLayer(torch.nn.Module):
+    """
+    Pairwise cosine distance computation layer.
+    This module computes pairwise cosine-based distances between embedding
+    vectors within the same input tensor. The operation is performed along
+    the last dimension, producing a square similarity (or distance) matrix
+    for each leading batch dimension.
+    """
+    def __init__(self, **kwargs):
+        """
+        Initialize the cosine distance layer.
+        Args:
+            **kwargs: Additional keyword arguments forwarded to
+                `torch.nn.Module`.
+        """
+        super().__init__(**kwargs)
+    @staticmethod
+    def forward(x: torch.Tensor) -> torch.Tensor:
+        """
+        Compute pairwise cosine similarity between embeddings.
+        The input embeddings are L2-normalized along the last dimension
+        before computing the cosine similarity matrix. The absolute value
+        of the similarity is returned, treating opposite directions as
+        related.
+        Args:
+            x (torch.Tensor): Input tensor of shape [..., S, D], where
+                `S` is the number of embeddings and `D` is the embedding
+                dimensionality.
+        Returns:
+            torch.Tensor: Tensor of shape [..., S, S] containing the
+            pair-wise cosine similarities.
+        """
+        # Normalize for last dim:
+        x_norm = torch.nn.functional.normalize(x, p=2, dim=-1)  # [..., S, D]
+        # Cosine similarity
+        sim = torch.matmul(x_norm, x_norm.transpose(-2, -1))  # [..., S, S]
+        return torch.abs(sim)
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/model/cosenet/trainable_sigmoid.py ADDED Viewed

	@@ -0,0 +1,53 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+import torch
+class TrainableSigmoid(torch.nn.Module):
+    """
+    Trainable sigmoid activation module with learnable scaling.
+    This module implements a sigmoid function whose slope is controlled by
+    a trainable parameter. It is designed to adaptively rescale input values
+    (e.g., distances or similarity scores) around a fixed midpoint (0.5),
+    allowing the model to learn the appropriate sharpness of the transition
+    during training.
+    """
+    def __init__(self, init_scale: float = 5.0, **kwargs):
+        """
+        Initialize the trainable sigmoid module.
+        Args:
+            init_scale (float, optional): Initial magnitude of the sigmoid
+                scaling factor. Internally, the learnable parameter is
+                initialized as the negative of this value. Defaults to 5.0.
+            **kwargs: Additional keyword arguments forwarded to
+                `torch.nn.Module`.
+        """
+        super().__init__(**kwargs)
+        self.alpha = torch.nn.Parameter(torch.tensor(-init_scale, dtype=torch.float32))
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Apply the trainable sigmoid transformation to the input tensor.
+        The transformation is centered at 0.5 and scaled by a learnable
+        parameter, enabling adaptive control over the sigmoid steepness.
+        Args:
+            x (torch.Tensor): Input tensor containing values to be transformed.
+        Returns:
+            torch.Tensor: Tensor of the same shape as `x`, with the trainable
+            sigmoid function applied element-wise.
+        """
+        return 1 / (1 + torch.exp(self.alpha * (x - 0.5)))
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/model/loss.py ADDED Viewed

	@@ -0,0 +1,115 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+import torch
+class MaskedBCELoss(torch.nn.Module):
+    """
+    Binary Cross-Entropy loss with explicit masking support.
+    This loss function computes the binary cross-entropy over valid (non-padded)
+    elements only, as indicated by a boolean mask. It supports both logits and
+    probability inputs, and provides configurable reduction strategies.
+    Masking semantics can be adapted to match PyTorch-style padding conventions
+    or custom masking schemes.
+    """
+    def __init__(
+        self,
+        reduction: str = 'mean',
+        valid_pad: bool = True,
+        eps: float = 1e-7,
+        logits: bool = True
+    ):
+        """
+        Initialize the masked binary cross-entropy loss.
+        Args:
+            reduction (str, optional): Reduction method applied over valid
+                elements. Must be either `'mean'` or `'sum'`. Defaults to `'mean'`.
+            valid_pad (bool, optional): Mask interpretation mode. If True,
+                `True` values in the mask indicate valid (non-padded) positions.
+                If False, `True` values indicate padded positions, following
+                PyTorch-style padding conventions. Defaults to True.
+            eps (float, optional): Small numerical constant used to clamp
+                probability inputs when `logits=False`. Defaults to 1e-7.
+            logits (bool, optional): Whether the input predictions are logits.
+                If True, `binary_cross_entropy_with_logits` is used; otherwise,
+                standard binary cross-entropy is applied. Defaults to True.
+        Raises:
+            ValueError: If an unsupported reduction mode is provided.
+        """
+        super().__init__()
+        if reduction not in ['mean', 'sum']:
+            raise ValueError("[MASKED-BCE] Reduction must be 'mean' or 'sum'")
+        self.reduction = reduction
+        self.valid_pad = valid_pad
+        self.logits = logits
+        self.eps = eps
+        if logits:
+            self.loss = torch.nn.functional.binary_cross_entropy_with_logits
+        else:
+            self.loss = torch.nn.functional.binary_cross_entropy
+    def forward(
+        self,
+        x: torch.Tensor,
+        y: torch.Tensor,
+        mask: torch.Tensor
+    ) -> torch.Tensor:
+        """
+        Compute the masked binary cross-entropy loss.
+        Args:
+            x (torch.Tensor): Model predictions with shape (B, S). If
+                `logits=True`, values are interpreted as logits; otherwise,
+                as probabilities in [0, 1].
+            y (torch.Tensor): Ground-truth binary labels with shape (B, S).
+            mask (torch.Tensor): Boolean mask tensor with shape (B, S).
+                The interpretation of the mask depends on `valid_pad`.
+                If `valid_pad=True`, `True` indicates valid positions.
+                If `valid_pad=False`, `True` indicates padded positions.
+        Returns:
+            torch.Tensor: Scalar tensor containing the reduced loss value.
+        """
+        # Determine valid positions:
+        if self.valid_pad:
+            valid_mask = mask
+        else:
+            valid_mask = torch.logical_not(mask)
+        # Numerical stability for probability inputs:
+        if not self.logits:
+            x = x.clamp(self.eps, 1.0 - self.eps)
+        # Element-wise BCE:
+        loss_per_token = self.loss(
+            x.float(),
+            y.float(),
+            reduction='none'
+        )
+        # Mask padded positions:
+        masked_loss = loss_per_token * valid_mask.float()
+        if self.reduction == 'mean':
+            denom = valid_mask.sum().clamp(min=1)
+            return masked_loss.sum() / denom
+        elif self.reduction == 'sum':
+            return masked_loss.sum()
+        else:
+            raise ValueError("[MASKED-BCE] Reduction must be 'mean' or 'sum'")
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/model/segmentation.py ADDED Viewed

	@@ -0,0 +1,147 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+import torch
+from .config import ModelConfig
+from .cosenet import CosineDistanceLayer, CoSeNet
+from .transformers import EncoderBlock, PositionalEncoding, MaskedMeanPooling
+class SegmentationNetwork(torch.nn.Module):
+    """
+    Segmentation network combining Transformer encoders with CoSeNet.
+    This model integrates token embeddings and positional encodings with
+    a stack of Transformer encoder blocks to produce contextualized
+    representations. These representations are then processed by a
+    CoSeNet module to perform structured segmentation, followed by a
+    cosine-based distance computation.
+    The final output is a pair-wise distance matrix suitable for
+    segmentation or boundary detection tasks.
+    """
+    def __init__(self, model_config: ModelConfig, **kwargs):
+        """
+        Initialize the segmentation network.
+        The network is composed of an embedding layer, positional encoding,
+        multiple Transformer encoder blocks, a CoSeNet segmentation module,
+        and a cosine distance layer.
+        Args:
+            model_config (ModelConfig): Configuration object containing all
+                hyperparameters required to build the model, including
+                vocabulary size, model dimensionality, transformer settings,
+                and CoSeNet parameters.
+            **kwargs: Additional keyword arguments forwarded to
+                `torch.nn.Module`.
+        """
+        super().__init__(**kwargs)
+        self.valid_padding = model_config.valid_padding
+        # Build layers:
+        self.embedding = torch.nn.Embedding(
+            model_config.vocab_size,
+            model_config.model_dim
+        )
+        self.positional_encoding = PositionalEncoding(
+            emb_dim=model_config.model_dim,
+            max_len=model_config.max_tokens
+        )
+        self.cosenet = CoSeNet(
+            trainable=model_config.cosenet.trainable,
+            init_scale=model_config.cosenet.init_scale
+        )
+        self.distance_layer = CosineDistanceLayer()
+        self.pooling = MaskedMeanPooling(valid_pad=model_config.valid_padding)
+        # Build encoder blocks:
+        module_list = list()
+        for transformer_config in model_config.transformers:
+            encoder_block = EncoderBlock(
+                feature_dim=model_config.model_dim,
+                attention_heads=transformer_config.attention_heads,
+                feed_forward_multiplier=transformer_config.feed_forward_multiplier,
+                dropout=transformer_config.dropout,
+                valid_padding=model_config.valid_padding,
+                pre_normalize=transformer_config.pre_normalize
+            )
+            module_list.append(encoder_block)
+        self.encoder_blocks = torch.nn.ModuleList(module_list)
+    def forward(self, x: torch.Tensor, mask: torch.Tensor = None, candidate_mask: torch.Tensor = None) -> torch.Tensor:
+        """
+        Forward pass of the segmentation network.
+        The input token indices are embedded and enriched with positional
+        information, then processed by a stack of Transformer encoder
+        blocks. The resulting representations are segmented using CoSeNet
+        and finally transformed into a pair-wise distance representation.
+        Args:
+            x (torch.Tensor): Input tensor of token indices with shape
+                (batch_size, sequence_length).
+            mask (torch.Tensor, optional): Optional mask tensor indicating
+                valid or padded positions, depending on the configuration
+                of the Transformer blocks. Defaults to None.
+                If `valid_padding` is disabled, the mask is inverted before being
+                passed to CoSeNet to match its masking convention.
+            candidate_mask (torch.Tensor, optional): Optional mask tensor for
+                candidate positions in CoSeNet. Defaults to None.
+                If `valid_padding` is disabled, the mask is inverted before being
+                passed to CoSeNet to match its masking convention.
+        Returns:
+            torch.Tensor: Output tensor containing pairwise distance values
+            derived from the segmented representations.
+        """
+        # Convert to type:
+        x = x.int()
+        # Embedding and positional encoding:
+        x = self.embedding(x)
+        x = self.positional_encoding(x)
+        # Reshape x and mask:
+        _b, _s, _t, _d = x.shape
+        x = x.reshape(_b * _s, _t, _d)
+        if mask is not None:
+            mask = mask.reshape(_b * _s, _t).bool()
+        # Encode the sequence:
+        for encoder in self.encoder_blocks:
+            x = encoder(x, mask=mask)
+        # Reshape x and mask:
+        x = x.reshape(_b, _s, _t, _d)
+        if mask is not None:
+            mask = mask.reshape(_b, _s, _t)
+            mask = torch.logical_not(mask) if not self.valid_padding else mask
+        # Apply pooling:
+        x, mask = self.pooling(x, mask=mask)
+        # Compute distances:
+        x = self.distance_layer(x)
+        # Pass through CoSeNet:
+        x = self.cosenet(x, mask=mask)
+        # Apply candidate mask if provided:
+        if candidate_mask is not None:
+            candidate_mask = candidate_mask.bool() if not self.valid_padding else torch.logical_not(candidate_mask.bool())
+            candidate_mask = candidate_mask.to(device=x.device)
+            x = x.masked_fill(candidate_mask, 0)
+        return x
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/model/transformers/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+from .attention import EncoderBlock
+from .positional_encoding import PositionalEncoding
+from .pooling import MaskedMeanPooling
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/model/transformers/attention.py ADDED Viewed

	@@ -0,0 +1,176 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+import torch
+class EncoderBlock(torch.nn.Module):
+    """
+        Transformer encoder block with configurable Pre-LayerNorm or Post-LayerNorm
+        architecture.
+        The block consists of a multi-head self-attention sublayer followed by a
+        position-wise feed-forward network, each wrapped with a residual connection.
+        Layer normalization can be applied either before each sublayer (Pre-LN) or
+        after each residual addition (Post-LN).
+        This design allows stable training of deep Transformer stacks while retaining
+        compatibility with the original Transformer formulation.
+    """
+    def __init__(
+            self,
+            feature_dim: int,
+            attention_heads: int = 8,
+            feed_forward_multiplier: float = 4,
+            dropout: float = 0.0,
+            valid_padding: bool = False,
+            pre_normalize: bool = True,
+            **kwargs
+    ):
+        """
+        Initializes a Transformer encoder block.
+        Parameters
+        ----------
+        feature_dim : int
+            Dimensionality of the input and output feature representations.
+        attention_heads : int, optional
+            Number of attention heads used in the multi-head self-attention layer.
+            Default is 8.
+        feed_forward_multiplier : float, optional
+            Expansion factor for the hidden dimension of the feed-forward network.
+            The intermediate dimension is computed as
+            `feed_forward_multiplier * feature_dim`.
+            Default is 4.
+        dropout : float, optional
+            Dropout probability applied to the feed-forward residual connection.
+            Default is 0.0.
+        valid_padding : bool, optional
+            If True, the provided mask marks valid (non-padded) positions.
+            If False, the mask marks padded (invalid) positions directly.
+            Default is False.
+        pre_normalize : bool, optional
+            If True, uses the Pre-LayerNorm Transformer variant, applying layer
+            normalization before each sublayer (self-attention and feed-forward).
+            If False, uses the Post-LayerNorm variant, applying normalization after
+            each residual connection.
+            Default is True.
+        **kwargs
+            Additional keyword arguments passed to the parent `torch.nn.Module`.
+        """
+        # Module init via kwargs:
+        super().__init__(**kwargs)
+        # Store params:
+        self.valid_padding = valid_padding
+        self.pre_normalize = pre_normalize
+        # Norm layers:
+        self.norm_in = torch.nn.LayerNorm(feature_dim)
+        self.norm_out = torch.nn.LayerNorm(feature_dim)
+        # Dropout layer:
+        self.dropout = torch.nn.Dropout(dropout)
+        # Attention layer:
+        self.attention = torch.nn.MultiheadAttention(
+            embed_dim=feature_dim,
+            num_heads=attention_heads,
+            dropout=0.0,
+            batch_first=True
+        )
+        # Feed-forward layer:
+        self.feed_forward = torch.nn.Sequential(
+            torch.nn.Linear(feature_dim, int(feed_forward_multiplier * feature_dim)),
+            torch.nn.GELU(),
+            torch.nn.Linear(int(feed_forward_multiplier * feature_dim), feature_dim),
+        )
+    def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
+        """
+        Forward pass of a Transformer encoder block.
+        Parameters
+        ----------
+        x : torch.Tensor
+            Input tensor of shape (batch_size, sequence_length, feature_dim).
+        mask : torch.Tensor or None, optional
+            Boolean mask indicating valid sequence positions.
+            Shape: (batch_size, sequence_length).
+            If `valid_padding` is True, True values denote valid tokens.
+            Otherwise, True values denote masked (invalid) positions.
+        Returns
+        -------
+        x : torch.Tensor
+            Output tensor of the same shape as the input
+            (batch_size, sequence_length, feature_dim).
+        """
+        # Convert mask:
+        if mask is not None and self.valid_padding:
+            key_padding_mask = ~mask.bool()  # True = pad
+            valid_mask = mask.bool()
+        elif mask is not None:
+            key_padding_mask = mask.bool()
+            valid_mask = ~mask.bool()
+        else:
+            key_padding_mask = None
+            valid_mask = None
+        # Detect fully padded sequences:
+        if valid_mask is not None:
+            all_pad = ~valid_mask.any(dim=-1)  # (B,)
+        else:
+            all_pad = None
+        # Pre-normalization:
+        if self.pre_normalize:
+            h = self.norm_in(x)
+        else:
+            h = x
+        # Attention (guard against fully padded sequences):
+        if all_pad is not None and all_pad.any():
+            h_attn = h.clone()
+            h_attn[all_pad] = 0.0
+            if key_padding_mask is not None:
+                key_padding_mask = key_padding_mask.clone()
+                key_padding_mask[all_pad] = False
+        else:
+            h_attn = h
+        attn_out, _ = self.attention(
+            h_attn, h_attn, h_attn,
+            key_padding_mask=key_padding_mask,
+            need_weights=False,
+        )
+        x = x + attn_out
+        # Post-attention normalization:
+        if not self.pre_normalize:
+            z = self.norm_in(x)
+        else:
+            z = self.norm_out(x)
+        # Feed-forward:
+        z = self.feed_forward(z)
+        x = x + self.dropout(z)
+        if not self.pre_normalize:
+            x = self.norm_out(x)
+        # Re-pad fully padded sequences:
+        if all_pad is not None:
+            x = x.masked_fill(all_pad[:, None, None], 0.0)
+        return x
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/model/transformers/pooling.py ADDED Viewed

	@@ -0,0 +1,78 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+import torch
+class MaskedMeanPooling(torch.nn.Module):
+    """
+    Mean pooling layer with explicit masking support.
+    This layer computes the mean over the sequence dimension while
+    ignoring padded elements according to a boolean mask. It supports
+    both PyTorch-style padding masks and valid-position masks.
+    """
+    def __init__(self, valid_pad: bool = True, eps: float = 1e-6):
+        """
+        Initialize the masked mean pooling layer.
+        Args:
+            valid_pad (bool, optional): Mask interpretation mode. If True,
+                `True` values in the mask indicate valid (non-padded) positions.
+                If False, `True` values indicate padded positions, following
+                PyTorch-style padding conventions. Defaults to True.
+            eps (float, optional): Small constant to avoid division by zero
+                when all positions are masked. Defaults to 1e-8.
+        """
+        super().__init__()
+        self.valid_pad = valid_pad
+        self.eps = eps
+    def forward(
+        self,
+        x: torch.Tensor,
+        mask: torch.Tensor
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """
+        Apply masked mean pooling.
+        Args:
+            x (torch.Tensor): Input tensor of shape (..., S, D), where
+                B is the batch size, S the sequence length, and D the
+                feature dimension.
+            mask (torch.Tensor): Boolean mask tensor of shape (..., S).
+                The interpretation depends on `valid_pad`.
+        Returns:
+            tuple:
+                torch.Tensor: Pooled tensor of shape (..., D).
+                torch.Tensor: Updated valid mask after pooling of shape (..., ).
+        """
+        # Mask handling:
+        if mask is None:
+            valid_mask = torch.ones(x.shape[:3], dtype=torch.bool, device=x.device)
+        else:
+            valid_mask = mask
+        # Valid:
+        if self.valid_pad:
+            valid_mask = valid_mask
+        else:
+            valid_mask = torch.logical_not(valid_mask)
+        valid_mask = valid_mask.unsqueeze(-1).to(x.dtype)  # (..., S, 1)
+        summed = torch.sum(x * valid_mask, dim=-2)          # (..., D)
+        denom = valid_mask.sum(dim=-2).clamp(min=self.eps)  # (..., 1)
+        # Valid mask pooling (any):
+        valid_mask = valid_mask.squeeze(-1).any(dim=-1)
+        return summed / denom, valid_mask
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

src/model/transformers/positional_encoding.py ADDED Viewed

	@@ -0,0 +1,62 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+import torch
+class PositionalEncoding(torch.nn.Module):
+    """
+    Sinusoidal positional encoding module for Transformer models.
+    This module injects information about the relative or absolute position of
+    tokens in a sequence by adding fixed sinusoidal embeddings to the input
+    embeddings. The positional encodings are non-learnable and follow the
+    formulation introduced in the original Transformer architecture.
+    """
+    def __init__(self, emb_dim: int, max_len: int = 5000, **kwargs):
+        """
+        Initialize the positional encoding module.
+        Parameters
+        ----------
+        emb_dim : int
+            Dimensionality of the embedding space.
+        max_len : int, optional
+            Maximum supported sequence length for which positional encodings
+            are precomputed.
+        """
+        super().__init__(**kwargs)
+        # Create positional encodings:
+        pe = torch.zeros(max_len, emb_dim)
+        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
+        div_term = torch.exp(torch.arange(0, emb_dim, 2).float() * -(torch.log(torch.tensor(10000.0)) / emb_dim))
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+        pe = pe.unsqueeze(0)
+        # Register as a buffer:
+        self.register_buffer('positional_encoding', pe)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Add positional encodings to the input embeddings.
+        Parameters
+        ----------
+        x : torch.Tensor
+            Input tensor of shape (batch_size, sequence_length, emb_dim).
+        Returns
+        -------
+        torch.Tensor
+            Tensor of the same shape as the input with positional encodings added.
+        """
+        return x + self.positional_encoding[:, :x.size(1), :]
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

train/config.py ADDED Viewed

	@@ -0,0 +1,193 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+import os
+from dataclasses import dataclass
+from src.model import ModelConfig, CoSeNetConfig, TransformerConfig
+from src.dataset import DatasetConfig
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                  SETUP CONFIGURATION                      #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+@dataclass
+class SetupConfig:
+    """
+    Configuration parameters related to the execution environment and logging.
+    This configuration controls device selection, checkpointing behavior,
+    reproducibility settings, and logging paths for an experiment.
+    """
+    device_number: int = 0
+    save_model_each: int = 0
+    seed: int = None
+    logging_path: str = None
+    reload_checkpoint: bool = False
+def overwrite_setup_config() -> SetupConfig:
+    """
+    Create and override the default setup configuration.
+    This function customizes execution-level parameters such as logging
+    paths, checkpoint reloading, and model saving frequency.
+    Returns:
+        SetupConfig: The configured setup configuration object.
+    """
+    config = SetupConfig()
+    config.logging_path = r'/workspace/logs'
+    config.reload_checkpoint = True
+    config.save_model_each = 1
+    return config
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                  TRAINING CONFIGURATION                   #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+@dataclass
+class TrainConfig:
+    """
+    Training configuration container.
+    This dataclass aggregates model, dataset, and setup configurations,
+    together with optimization and training hyperparameters.
+    """
+    # Linked configurations:
+    model_config: ModelConfig | None = None
+    dataset_config: DatasetConfig | None = None
+    setup_config: SetupConfig | None = None
+    # Training parameters:
+    batch_size: int = 32
+    num_epochs: int = 100
+    # Optimizer parameters:
+    learning_rate: float = 1e-4
+    learning_rate_min: float = 1e-5
+    weight_decay: float = 1e-8
+    betas: tuple[float, float] = (0.5, 0.999)
+def overwrite_train_config() -> TrainConfig:
+    """
+    Create and override the default training configuration.
+    This function customizes batch size, number of epochs, and optimizer
+    hyperparameters for the training process.
+    Returns:
+        TrainConfig: The configured training configuration object.
+    """
+    config = TrainConfig()
+    config.batch_size = 4
+    config.num_epochs = 200
+    config.learning_rate = 5e-4
+    config.learning_rate_min = 5e-5
+    config.weight_decay = 1e-6
+    return config
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                  DATASET CONFIGURATION                    #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+def overwrite_dataset_config() -> DatasetConfig:
+    """
+    Create and override the dataset configuration.
+    This function sets the file paths and usage percentages for training,
+    validation, and test datasets.
+    Returns:
+        DatasetConfig: The configured dataset configuration object.
+    """
+    config = DatasetConfig()
+    config.train_data_path = r"/workspace/data/tokens-A000-segmentation"
+    config.val_data_path = r"/workspace/data/tokens-A001-segmentation"
+    config.test_data_path = r"/workspace/data/tokens-A002-segmentation"
+    config.train_percentage = 1.0
+    config.val_percentage = 1.0
+    config.test_percentage = 1.0
+    return config
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                  MODEL CONFIGURATION                      #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+def overwrite_model_config() -> ModelConfig:
+    """
+    Create and override the model configuration.
+    This function defines the architecture-level parameters, including
+    vocabulary size, embedding dimensionality, CoSeNet settings, and
+    the stack of Transformer encoder configurations.
+    Returns:
+        ModelConfig: The configured model configuration object.
+    """
+    config = ModelConfig()
+    # High-level params:
+    config.vocab_size = 32_768
+    config.model_dim = 256
+    config.valid_padding = True
+    # CoSeNet params:
+    config.cosenet = CoSeNetConfig(
+        trainable=True,
+        init_scale=5.0
+    )
+    # Transformer params:
+    config.transformers = [
+        TransformerConfig(**cfg)
+        for cfg in [
+            {
+                "attention_heads": 16,
+                "feed_forward_multiplier": 8,
+                "dropout": 0.0,
+                "pre_normalize": True
+            },
+            {
+                "attention_heads": 16,
+                "feed_forward_multiplier": 8,
+                "dropout": 0.0,
+                "pre_normalize": True
+            }
+        ]
+    ]
+    return config
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                  WHOLE CONFIGURATION                      #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+def configuration() -> TrainConfig:
+    """
+    Create the experiment configuration
+    :return: A TrainConfig configuration object
+    """
+    config = overwrite_train_config()
+    config.setup_config = overwrite_setup_config()
+    config.model_config = overwrite_model_config()
+    config.dataset_config = overwrite_dataset_config()
+    # Assert:
+    if not os.path.exists(config.dataset_config.train_data_path):
+        raise FileNotFoundError(f"Train data path does not exist: {config.dataset_config.train_data_path}")
+    if not os.path.exists(config.dataset_config.val_data_path):
+        raise FileNotFoundError(f"Validation data path does not exist: {config.dataset_config.val_data_path}")
+    if not 0.0 < config.dataset_config.train_percentage <= 1.0:
+        raise ValueError("Train percentage must be in (0.0, 1.0]")
+    if not 0.0 < config.dataset_config.val_percentage <= 1.0:
+        raise ValueError("Validation percentage must be in (0.0, 1.0]")
+    return config
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #

train/train_logs/config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "model_config": {
+        "vocab_size": 32768,
+        "model_dim": 256,
+        "max_tokens": 382,
+        "max_sentences": 384,
+        "valid_padding": true,
+        "cosenet": {
+            "trainable": true,
+            "init_scale": 5.0
+        },
+        "transformers": [
+            {
+                "attention_heads": 16,
+                "feed_forward_multiplier": 8,
+                "dropout": 0.0,
+                "pre_normalize": true
+            },
+            {
+                "attention_heads": 16,
+                "feed_forward_multiplier": 8,
+                "dropout": 0.0,
+                "pre_normalize": true
+            }
+        ]
+    },
+    "dataset_config": {
+        "train_data_path": "/workspace/data/tokens-A000-segmentation",
+        "val_data_path": "/workspace/data/tokens-A001-segmentation",
+        "test_data_path": "/workspace/data/tokens-A002-segmentation",
+        "train_percentage": 1.0,
+        "val_percentage": 1.0,
+        "test_percentage": 1.0,
+        "num_workers": 0,
+        "shuffle_train": true,
+        "shuffle_val": true
+    },
+    "setup_config": {
+        "device_number": 0,
+        "save_model_each": 1,
+        "seed": null,
+        "logging_path": "/workspace/logs",
+        "reload_checkpoint": true
+    },
+    "batch_size": 4,
+    "num_epochs": 200,
+    "learning_rate": 0.0005,
+    "learning_rate_min": 5e-05,
+    "weight_decay": 1e-06,
+    "betas": [
+        0.5,
+        0.999
+    ]
+}

train/train_logs/logfile.log ADDED Viewed

	@@ -0,0 +1,489 @@

+2025-12-26 14:45:56,651: [INFO] Logger initialized with writer handler at: /workspace/logs/logfile.log
+2025-12-26 14:45:56,659: [INFO] TensorBoard logs will be stored in: /workspace/logs/logs
+2025-12-26 14:45:56,659: [INFO] Model checkpoints will be stored in: /workspace/logs/checkpoints
+2025-12-26 14:45:56,672: [INFO] TensorBoard running at http://0.0.0.0:6006/ (pid=76392)
+2025-12-26 14:45:56,680: [INFO] Initializer set up seed: 1766760356
+2025-12-26 14:45:56,728: [INFO] PyTorch is now configured to use GPU 0: NVIDIA A40
+2025-12-26 14:45:56,729: [INFO] [GPU 0 - NVIDIA A40] Memory Stats:
+2025-12-26 14:45:56,729: [INFO]   Total Memory      : 45498.00 MB
+2025-12-26 14:45:56,730: [INFO]   Currently Allocated : 0.00 MB
+2025-12-26 14:45:56,730: [INFO]   Currently Reserved  : 0.00 MB
+2025-12-26 14:45:56,730: [INFO]   Max Allocated       : 0.00 MB
+2025-12-26 14:45:56,731: [INFO]   Max Reserved        : 0.00 MB
+2025-12-26 14:45:56,731: [INFO] Setup information:
+2025-12-26 14:45:56,732: [INFO] - Setup path: /workspace/logs
+2025-12-26 14:45:56,732: [INFO] - Setup checkpoints path: /workspace/logs/checkpoints
+2025-12-26 14:45:56,732: [INFO] - Setup device: cuda:0
+2025-12-26 14:45:56,733: [INFO] - Setup seed: 1766760356
+2025-12-26 14:45:56,733: [INFO] - Setup logger: <Logger src.dlutils.setup.logger (INFO)>
+2025-12-26 14:45:56,734: [INFO] - Setup writer: <torch.utils.tensorboard.writer.SummaryWriter object at 0x76e7ade77910>
+2025-12-26 14:45:56,734: [INFO] - Setup save each: 20
+2025-12-26 14:45:56,737: [INFO] [SegmentationDataset] Loaded dataset: /workspace/data/tokens-A000-segmentation
+2025-12-26 14:45:56,737: [INFO] [SegmentationDataset] Loaded dataset length: 26510
+2025-12-26 14:45:56,745: [INFO] [SegmentationDataset] Loaded dataset: /workspace/data/tokens-A001-segmentation
+2025-12-26 14:45:56,745: [INFO] [SegmentationDataset] Loaded dataset length: 3336
+2025-12-26 14:45:57,294: [INFO] [TRAIN] Model Configuration:
+{'vocab_size': 32768, 'model_dim': 256, 'max_tokens': 382, 'max_sentences': 384, 'valid_padding': True, 'cosenet': CoSeNetConfig(trainable=True, init_scale=5.0), 'transformers': [TransformerConfig(attention_heads=16, feed_forward_multiplier=8, dropout=0.0, pre_normalize=True), TransformerConfig(attention_heads=16, feed_forward_multiplier=8, dropout=0.0, pre_normalize=True)]}
+2025-12-26 14:45:57,294: [INFO] [TRAIN] Model parameters: 11.022865 M
+2025-12-26 14:45:57,295: [INFO] [TRAIN] Trainable parameters: 11.022865 M
+2025-12-26 14:45:57,295: [INFO] [TRAIN] Training batches: 67
+2025-12-26 14:47:48,849: [INFO] Epoch [0]: loss = 0.53930415
+2025-12-26 14:47:50,411: [INFO] Epoch [1]: val_loss = 0.50874352
+2025-12-26 14:49:37,680: [INFO] Epoch [1]: loss = 0.52674737
+2025-12-26 14:49:39,116: [INFO] Epoch [2]: val_loss = 0.51872101
+2025-12-26 14:51:27,172: [INFO] Epoch [2]: loss = 0.52592351
+2025-12-26 14:51:28,612: [INFO] Epoch [3]: val_loss = 0.51301319
+2025-12-26 14:53:16,691: [INFO] Epoch [3]: loss = 0.52935326
+2025-12-26 14:53:18,212: [INFO] Epoch [4]: val_loss = 0.51744863
+2025-12-26 14:55:05,752: [INFO] Epoch [4]: loss = 0.52446729
+2025-12-26 14:55:07,327: [INFO] Epoch [5]: val_loss = 0.51929819
+2025-12-26 14:56:57,434: [INFO] Epoch [5]: loss = 0.52781746
+2025-12-26 14:56:58,912: [INFO] Epoch [6]: val_loss = 0.52006621
+2025-12-26 14:58:46,224: [INFO] Epoch [6]: loss = 0.52644637
+2025-12-26 14:58:47,712: [INFO] Epoch [7]: val_loss = 0.51545532
+2025-12-26 15:00:34,974: [INFO] Epoch [7]: loss = 0.52535941
+2025-12-26 15:00:36,412: [INFO] Epoch [8]: val_loss = 0.52077476
+2025-12-26 15:02:24,083: [INFO] Epoch [8]: loss = 0.52521282
+2025-12-26 15:02:25,525: [INFO] Epoch [9]: val_loss = 0.51527728
+2025-12-26 15:04:13,376: [INFO] Epoch [9]: loss = 0.52329010
+2025-12-26 15:04:14,816: [INFO] Epoch [10]: val_loss = 0.51563372
+2025-12-26 15:06:03,934: [INFO] Epoch [10]: loss = 0.52397644
+2025-12-26 15:06:05,412: [INFO] Epoch [11]: val_loss = 0.51372376
+2025-12-26 15:07:54,323: [INFO] Epoch [11]: loss = 0.52039668
+2025-12-26 15:07:55,813: [INFO] Epoch [12]: val_loss = 0.51369372
+2025-12-26 15:09:43,559: [INFO] Epoch [12]: loss = 0.51899378
+2025-12-26 15:09:45,012: [INFO] Epoch [13]: val_loss = 0.52238202
+2025-12-26 15:11:32,423: [INFO] Epoch [13]: loss = 0.51784248
+2025-12-26 15:11:33,912: [INFO] Epoch [14]: val_loss = 0.51489054
+2025-12-26 15:13:22,761: [INFO] Epoch [14]: loss = 0.50914923
+2025-12-26 15:13:24,212: [INFO] Epoch [15]: val_loss = 0.50278137
+2025-12-26 15:15:11,956: [INFO] Epoch [15]: loss = 0.50427987
+2025-12-26 15:15:13,412: [INFO] Epoch [16]: val_loss = 0.50158396
+2025-12-26 15:17:01,228: [INFO] Epoch [16]: loss = 0.50178539
+2025-12-26 15:17:02,711: [INFO] Epoch [17]: val_loss = 0.50242173
+2025-12-26 15:18:51,266: [INFO] Epoch [17]: loss = 0.49650285
+2025-12-26 15:18:52,716: [INFO] Epoch [18]: val_loss = 0.50932210
+2025-12-26 15:20:40,343: [INFO] Epoch [18]: loss = 0.49234502
+2025-12-26 15:20:41,912: [INFO] Epoch [19]: val_loss = 0.50311281
+2025-12-26 15:22:29,693: [INFO] Epoch [19]: loss = 0.48797671
+2025-12-26 15:22:29,695: [INFO] Checkpointing model at epoch 20
+2025-12-26 15:22:30,454: [INFO] Model checkpointed at epoch 20
+2025-12-26 15:22:31,912: [INFO] Epoch [20]: val_loss = 0.53549688
+2025-12-26 15:24:19,843: [INFO] Epoch [20]: loss = 0.48723968
+2025-12-26 15:24:21,312: [INFO] Epoch [21]: val_loss = 0.49818926
+2025-12-26 15:26:08,715: [INFO] Epoch [21]: loss = 0.48037165
+2025-12-26 15:26:10,212: [INFO] Epoch [22]: val_loss = 0.48961075
+2025-12-26 15:27:59,123: [INFO] Epoch [22]: loss = 0.47390062
+2025-12-26 15:28:00,911: [INFO] Epoch [23]: val_loss = 0.48781847
+2025-12-26 15:29:49,056: [INFO] Epoch [23]: loss = 0.46711668
+2025-12-26 15:29:50,511: [INFO] Epoch [24]: val_loss = 0.47708375
+2025-12-26 15:31:37,663: [INFO] Epoch [24]: loss = 0.46234217
+2025-12-26 15:31:39,112: [INFO] Epoch [25]: val_loss = 0.46084376
+2025-12-26 15:33:26,345: [INFO] Epoch [25]: loss = 0.45538114
+2025-12-26 15:33:27,812: [INFO] Epoch [26]: val_loss = 0.47136071
+2025-12-26 15:35:15,250: [INFO] Epoch [26]: loss = 0.45225392
+2025-12-26 15:35:16,711: [INFO] Epoch [27]: val_loss = 0.47011130
+2025-12-26 15:37:04,599: [INFO] Epoch [27]: loss = 0.44760030
+2025-12-26 15:37:06,112: [INFO] Epoch [28]: val_loss = 0.46140307
+2025-12-26 15:38:54,426: [INFO] Epoch [28]: loss = 0.44472487
+2025-12-26 15:38:55,912: [INFO] Epoch [29]: val_loss = 0.47098119
+2025-12-26 15:40:43,445: [INFO] Epoch [29]: loss = 0.43989357
+2025-12-26 15:40:44,911: [INFO] Epoch [30]: val_loss = 0.45539117
+2025-12-26 15:42:32,383: [INFO] Epoch [30]: loss = 0.43657149
+2025-12-26 15:42:33,816: [INFO] Epoch [31]: val_loss = 0.46862131
+2025-12-26 15:44:21,074: [INFO] Epoch [31]: loss = 0.43649050
+2025-12-26 15:44:22,511: [INFO] Epoch [32]: val_loss = 0.45548641
+2025-12-26 15:46:09,812: [INFO] Epoch [32]: loss = 0.43346542
+2025-12-26 15:46:11,312: [INFO] Epoch [33]: val_loss = 0.45997839
+2025-12-26 15:47:59,053: [INFO] Epoch [33]: loss = 0.43235683
+2025-12-26 15:48:00,511: [INFO] Epoch [34]: val_loss = 0.47154692
+2025-12-26 15:49:47,991: [INFO] Epoch [34]: loss = 0.42891757
+2025-12-26 15:49:49,416: [INFO] Epoch [35]: val_loss = 0.46223042
+2025-12-26 15:51:36,793: [INFO] Epoch [35]: loss = 0.42735399
+2025-12-26 15:51:38,216: [INFO] Epoch [36]: val_loss = 0.46173553
+2025-12-26 15:53:25,570: [INFO] Epoch [36]: loss = 0.42965186
+2025-12-26 15:53:27,016: [INFO] Epoch [37]: val_loss = 0.46098506
+2025-12-26 15:55:14,511: [INFO] Epoch [37]: loss = 0.42778122
+2025-12-26 15:55:16,012: [INFO] Epoch [38]: val_loss = 0.46018566
+2025-12-26 15:57:06,234: [INFO] Epoch [38]: loss = 0.42445267
+2025-12-26 15:57:07,711: [INFO] Epoch [39]: val_loss = 0.46550667
+2025-12-26 15:58:59,230: [INFO] Epoch [39]: loss = 0.42354161
+2025-12-26 15:58:59,232: [INFO] Checkpointing model at epoch 40
+2025-12-26 15:58:59,945: [INFO] Model checkpointed at epoch 40
+2025-12-26 15:59:01,511: [INFO] Epoch [40]: val_loss = 0.47303247
+2025-12-26 16:00:49,480: [INFO] Epoch [40]: loss = 0.42338467
+2025-12-26 16:00:50,911: [INFO] Epoch [41]: val_loss = 0.45826835
+2025-12-26 16:02:38,743: [INFO] Epoch [41]: loss = 0.41971716
+2025-12-26 16:02:40,212: [INFO] Epoch [42]: val_loss = 0.45490133
+2025-12-26 16:04:28,045: [INFO] Epoch [42]: loss = 0.41987514
+2025-12-26 16:04:29,512: [INFO] Epoch [43]: val_loss = 0.45860666
+2025-12-26 16:06:16,948: [INFO] Epoch [43]: loss = 0.41933024
+2025-12-26 16:06:18,411: [INFO] Epoch [44]: val_loss = 0.45629129
+2025-12-26 16:08:06,282: [INFO] Epoch [44]: loss = 0.41593552
+2025-12-26 16:08:07,716: [INFO] Epoch [45]: val_loss = 0.46409211
+2025-12-26 16:09:55,161: [INFO] Epoch [45]: loss = 0.41721227
+2025-12-26 16:09:56,612: [INFO] Epoch [46]: val_loss = 0.46598683
+2025-12-26 16:11:43,939: [INFO] Epoch [46]: loss = 0.41726764
+2025-12-26 16:11:45,411: [INFO] Epoch [47]: val_loss = 0.45663830
+2025-12-26 16:13:32,862: [INFO] Epoch [47]: loss = 0.41537570
+2025-12-26 16:13:34,315: [INFO] Epoch [48]: val_loss = 0.46740513
+2025-12-26 16:15:21,546: [INFO] Epoch [48]: loss = 0.41457776
+2025-12-26 16:15:23,112: [INFO] Epoch [49]: val_loss = 0.44048135
+2025-12-26 16:17:10,274: [INFO] Epoch [49]: loss = 0.41388101
+2025-12-26 16:17:11,715: [INFO] Epoch [50]: val_loss = 0.45519451
+2025-12-26 16:18:58,990: [INFO] Epoch [50]: loss = 0.41285109
+2025-12-26 16:19:00,512: [INFO] Epoch [51]: val_loss = 0.45673202
+2025-12-26 16:20:47,636: [INFO] Epoch [51]: loss = 0.41195465
+2025-12-26 16:20:49,116: [INFO] Epoch [52]: val_loss = 0.45198240
+2025-12-26 16:22:36,418: [INFO] Epoch [52]: loss = 0.40953517
+2025-12-26 16:22:37,893: [INFO] Epoch [53]: val_loss = 0.47122019
+2025-12-26 16:24:25,331: [INFO] Epoch [53]: loss = 0.40789293
+2025-12-26 16:24:26,812: [INFO] Epoch [54]: val_loss = 0.44196667
+2025-12-26 16:26:14,026: [INFO] Epoch [54]: loss = 0.40474147
+2025-12-26 16:26:15,512: [INFO] Epoch [55]: val_loss = 0.46978565
+2025-12-26 16:28:03,793: [INFO] Epoch [55]: loss = 0.40504389
+2025-12-26 16:28:05,272: [INFO] Epoch [56]: val_loss = 0.47313605
+2025-12-26 16:29:52,585: [INFO] Epoch [56]: loss = 0.40562682
+2025-12-26 16:29:54,017: [INFO] Epoch [57]: val_loss = 0.46668073
+2025-12-26 16:31:41,107: [INFO] Epoch [57]: loss = 0.40768713
+2025-12-26 16:31:42,529: [INFO] Epoch [58]: val_loss = 0.45173921
+2025-12-26 16:33:29,962: [INFO] Epoch [58]: loss = 0.40458906
+2025-12-26 16:33:31,411: [INFO] Epoch [59]: val_loss = 0.46093515
+2025-12-26 16:35:18,548: [INFO] Epoch [59]: loss = 0.40175750
+2025-12-26 16:35:18,549: [INFO] Checkpointing model at epoch 60
+2025-12-26 16:35:19,200: [INFO] Model checkpointed at epoch 60
+2025-12-26 16:35:20,711: [INFO] Epoch [60]: val_loss = 0.46230425
+2025-12-26 16:37:07,991: [INFO] Epoch [60]: loss = 0.40113024
+2025-12-26 16:37:09,427: [INFO] Epoch [61]: val_loss = 0.46095682
+2025-12-26 16:38:57,025: [INFO] Epoch [61]: loss = 0.40212381
+2025-12-26 16:38:58,604: [INFO] Epoch [62]: val_loss = 0.45801103
+2025-12-26 16:40:47,358: [INFO] Epoch [62]: loss = 0.40149038
+2025-12-26 16:40:48,911: [INFO] Epoch [63]: val_loss = 0.45971834
+2025-12-26 16:42:36,483: [INFO] Epoch [63]: loss = 0.40096813
+2025-12-26 16:42:37,917: [INFO] Epoch [64]: val_loss = 0.47312803
+2025-12-26 16:44:25,338: [INFO] Epoch [64]: loss = 0.40213521
+2025-12-26 16:44:26,895: [INFO] Epoch [65]: val_loss = 0.45463914
+2025-12-26 16:46:14,170: [INFO] Epoch [65]: loss = 0.39824201
+2025-12-26 16:46:15,615: [INFO] Epoch [66]: val_loss = 0.47252337
+2025-12-26 16:48:03,455: [INFO] Epoch [66]: loss = 0.39898236
+2025-12-26 16:48:04,912: [INFO] Epoch [67]: val_loss = 0.46137960
+2025-12-26 16:49:52,778: [INFO] Epoch [67]: loss = 0.40269130
+2025-12-26 16:49:54,216: [INFO] Epoch [68]: val_loss = 0.47056969
+2025-12-26 16:51:41,521: [INFO] Epoch [68]: loss = 0.39804779
+2025-12-26 16:51:43,012: [INFO] Epoch [69]: val_loss = 0.46284741
+2025-12-26 16:53:30,778: [INFO] Epoch [69]: loss = 0.39931213
+2025-12-26 16:53:32,211: [INFO] Epoch [70]: val_loss = 0.47174325
+2025-12-26 16:55:19,747: [INFO] Epoch [70]: loss = 0.39947561
+2025-12-26 16:55:21,211: [INFO] Epoch [71]: val_loss = 0.47359799
+2025-12-26 16:57:08,420: [INFO] Epoch [71]: loss = 0.39680641
+2025-12-26 16:57:09,912: [INFO] Epoch [72]: val_loss = 0.45985634
+2025-12-26 16:58:57,465: [INFO] Epoch [72]: loss = 0.39784966
+2025-12-26 16:58:58,911: [INFO] Epoch [73]: val_loss = 0.47379973
+2025-12-26 17:00:46,781: [INFO] Epoch [73]: loss = 0.39575548
+2025-12-26 17:00:48,217: [INFO] Epoch [74]: val_loss = 0.46827143
+2025-12-26 17:02:35,956: [INFO] Epoch [74]: loss = 0.39844352
+2025-12-26 17:02:37,411: [INFO] Epoch [75]: val_loss = 0.48436255
+2025-12-26 17:04:25,013: [INFO] Epoch [75]: loss = 0.39737436
+2025-12-26 17:04:26,512: [INFO] Epoch [76]: val_loss = 0.45234020
+2025-12-26 17:06:13,974: [INFO] Epoch [76]: loss = 0.39371587
+2025-12-26 17:06:15,415: [INFO] Epoch [77]: val_loss = 0.45753057
+2025-12-26 17:08:03,455: [INFO] Epoch [77]: loss = 0.39684283
+2025-12-26 17:08:04,916: [INFO] Epoch [78]: val_loss = 0.46107266
+2025-12-26 17:09:52,265: [INFO] Epoch [78]: loss = 0.39561052
+2025-12-26 17:09:53,711: [INFO] Epoch [79]: val_loss = 0.48726222
+2025-12-26 17:11:40,915: [INFO] Epoch [79]: loss = 0.39534942
+2025-12-26 17:11:40,917: [INFO] Checkpointing model at epoch 80
+2025-12-26 17:11:41,448: [INFO] Model checkpointed at epoch 80
+2025-12-26 17:11:42,912: [INFO] Epoch [80]: val_loss = 0.47510581
+2025-12-26 17:13:31,165: [INFO] Epoch [80]: loss = 0.39408069
+2025-12-26 17:13:32,617: [INFO] Epoch [81]: val_loss = 0.46646976
+2025-12-26 17:15:20,095: [INFO] Epoch [81]: loss = 0.39456047
+2025-12-26 17:15:21,517: [INFO] Epoch [82]: val_loss = 0.47777673
+2025-12-26 17:17:10,031: [INFO] Epoch [82]: loss = 0.39687150
+2025-12-26 17:17:11,512: [INFO] Epoch [83]: val_loss = 0.47680868
+2025-12-26 17:18:59,688: [INFO] Epoch [83]: loss = 0.39627865
+2025-12-26 17:19:01,115: [INFO] Epoch [84]: val_loss = 0.47353493
+2025-12-26 17:20:48,468: [INFO] Epoch [84]: loss = 0.39516608
+2025-12-26 17:20:49,912: [INFO] Epoch [85]: val_loss = 0.47541119
+2025-12-26 17:22:38,068: [INFO] Epoch [85]: loss = 0.39570387
+2025-12-26 17:22:39,517: [INFO] Epoch [86]: val_loss = 0.46904831
+2025-12-26 17:24:26,979: [INFO] Epoch [86]: loss = 0.39411988
+2025-12-26 17:24:28,416: [INFO] Epoch [87]: val_loss = 0.47183328
+2025-12-26 17:26:16,242: [INFO] Epoch [87]: loss = 0.39453237
+2025-12-26 17:26:17,712: [INFO] Epoch [88]: val_loss = 0.48088008
+2025-12-26 17:28:05,674: [INFO] Epoch [88]: loss = 0.39265428
+2025-12-26 17:28:07,111: [INFO] Epoch [89]: val_loss = 0.46431010
+2025-12-26 17:29:54,253: [INFO] Epoch [89]: loss = 0.39518696
+2025-12-26 17:29:55,711: [INFO] Epoch [90]: val_loss = 0.47148239
+2025-12-26 17:31:43,155: [INFO] Epoch [90]: loss = 0.39560070
+2025-12-26 17:31:44,711: [INFO] Epoch [91]: val_loss = 0.47001378
+2025-12-26 17:33:32,048: [INFO] Epoch [91]: loss = 0.39522415
+2025-12-26 17:33:33,512: [INFO] Epoch [92]: val_loss = 0.47427877
+2025-12-26 17:35:20,792: [INFO] Epoch [92]: loss = 0.39726472
+2025-12-26 17:35:22,230: [INFO] Epoch [93]: val_loss = 0.48291658
+2025-12-26 17:37:09,543: [INFO] Epoch [93]: loss = 0.39664398
+2025-12-26 17:37:11,012: [INFO] Epoch [94]: val_loss = 0.49081665
+2025-12-26 17:38:58,458: [INFO] Epoch [94]: loss = 0.39135196
+2025-12-26 17:39:00,111: [INFO] Epoch [95]: val_loss = 0.47873766
+2025-12-26 17:40:47,362: [INFO] Epoch [95]: loss = 0.39417184
+2025-12-26 17:40:48,811: [INFO] Epoch [96]: val_loss = 0.48776019
+2025-12-26 17:42:36,318: [INFO] Epoch [96]: loss = 0.39321537
+2025-12-26 17:42:37,812: [INFO] Epoch [97]: val_loss = 0.46243800
+2025-12-26 17:44:25,656: [INFO] Epoch [97]: loss = 0.39767619
+2025-12-26 17:44:27,112: [INFO] Epoch [98]: val_loss = 0.45655080
+2025-12-26 17:46:14,472: [INFO] Epoch [98]: loss = 0.39206413
+2025-12-26 17:46:16,011: [INFO] Epoch [99]: val_loss = 0.46890352
+2025-12-26 17:48:03,170: [INFO] Epoch [99]: loss = 0.39380527
+2025-12-26 17:48:03,171: [INFO] Checkpointing model at epoch 100
+2025-12-26 17:48:03,725: [INFO] Model checkpointed at epoch 100
+2025-12-26 17:48:05,212: [INFO] Epoch [100]: val_loss = 0.49273304
+2025-12-26 17:49:52,333: [INFO] Epoch [100]: loss = 0.39218957
+2025-12-26 17:49:53,811: [INFO] Epoch [101]: val_loss = 0.47090062
+2025-12-26 17:51:41,096: [INFO] Epoch [101]: loss = 0.39274643
+2025-12-26 17:51:42,528: [INFO] Epoch [102]: val_loss = 0.46902628
+2025-12-26 17:53:29,975: [INFO] Epoch [102]: loss = 0.39481238
+2025-12-26 17:53:31,411: [INFO] Epoch [103]: val_loss = 0.48112577
+2025-12-26 17:55:18,420: [INFO] Epoch [103]: loss = 0.39440405
+2025-12-26 17:55:19,912: [INFO] Epoch [104]: val_loss = 0.49355557
+2025-12-26 17:57:07,125: [INFO] Epoch [104]: loss = 0.39165780
+2025-12-26 17:57:08,611: [INFO] Epoch [105]: val_loss = 0.48409717
+2025-12-26 17:58:56,554: [INFO] Epoch [105]: loss = 0.39554418
+2025-12-26 17:58:58,011: [INFO] Epoch [106]: val_loss = 0.48656076
+2025-12-26 18:00:45,347: [INFO] Epoch [106]: loss = 0.39228787
+2025-12-26 18:00:46,812: [INFO] Epoch [107]: val_loss = 0.48810028
+2025-12-26 18:02:33,925: [INFO] Epoch [107]: loss = 0.39156697
+2025-12-26 18:02:35,412: [INFO] Epoch [108]: val_loss = 0.47222325
+2025-12-26 18:04:23,740: [INFO] Epoch [108]: loss = 0.39423798
+2025-12-26 18:04:25,212: [INFO] Epoch [109]: val_loss = 0.47254576
+2025-12-26 18:06:12,535: [INFO] Epoch [109]: loss = 0.39252056
+2025-12-26 18:06:14,012: [INFO] Epoch [110]: val_loss = 0.48817008
+2025-12-26 18:08:01,245: [INFO] Epoch [110]: loss = 0.39401426
+2025-12-26 18:08:02,815: [INFO] Epoch [111]: val_loss = 0.48511344
+2025-12-26 18:09:50,465: [INFO] Epoch [111]: loss = 0.39587875
+2025-12-26 18:09:51,912: [INFO] Epoch [112]: val_loss = 0.48533930
+2025-12-26 18:11:39,151: [INFO] Epoch [112]: loss = 0.39013777
+2025-12-26 18:11:40,611: [INFO] Epoch [113]: val_loss = 0.48621008
+2025-12-26 18:13:28,463: [INFO] Epoch [113]: loss = 0.38981328
+2025-12-26 18:13:29,992: [INFO] Epoch [114]: val_loss = 0.47124515
+2025-12-26 18:15:17,549: [INFO] Epoch [114]: loss = 0.39461992
+2025-12-26 18:15:19,012: [INFO] Epoch [115]: val_loss = 0.48522179
+2025-12-26 18:17:06,234: [INFO] Epoch [115]: loss = 0.39355375
+2025-12-26 18:17:07,711: [INFO] Epoch [116]: val_loss = 0.49023107
+2025-12-26 18:18:54,822: [INFO] Epoch [116]: loss = 0.39458753
+2025-12-26 18:18:56,312: [INFO] Epoch [117]: val_loss = 0.48466966
+2025-12-26 18:20:43,478: [INFO] Epoch [117]: loss = 0.39362156
+2025-12-26 18:20:44,916: [INFO] Epoch [118]: val_loss = 0.50641123
+2025-12-26 18:22:32,316: [INFO] Epoch [118]: loss = 0.39327146
+2025-12-26 18:22:33,812: [INFO] Epoch [119]: val_loss = 0.47998404
+2025-12-26 18:24:21,387: [INFO] Epoch [119]: loss = 0.39500003
+2025-12-26 18:24:21,388: [INFO] Checkpointing model at epoch 120
+2025-12-26 18:24:21,956: [INFO] Model checkpointed at epoch 120
+2025-12-26 18:24:23,411: [INFO] Epoch [120]: val_loss = 0.47686255
+2025-12-26 18:26:11,350: [INFO] Epoch [120]: loss = 0.39359459
+2025-12-26 18:26:12,812: [INFO] Epoch [121]: val_loss = 0.47847155
+2025-12-26 18:28:00,158: [INFO] Epoch [121]: loss = 0.39265604
+2025-12-26 18:28:01,612: [INFO] Epoch [122]: val_loss = 0.48565416
+2025-12-26 18:29:49,068: [INFO] Epoch [122]: loss = 0.39327284
+2025-12-26 18:29:50,512: [INFO] Epoch [123]: val_loss = 0.46322163
+2025-12-26 18:31:37,854: [INFO] Epoch [123]: loss = 0.39371465
+2025-12-26 18:31:39,322: [INFO] Epoch [124]: val_loss = 0.49423857
+2025-12-26 18:33:26,668: [INFO] Epoch [124]: loss = 0.39377629
+2025-12-26 18:33:28,112: [INFO] Epoch [125]: val_loss = 0.50537621
+2025-12-26 18:35:15,388: [INFO] Epoch [125]: loss = 0.39129652
+2025-12-26 18:35:16,916: [INFO] Epoch [126]: val_loss = 0.50789308
+2025-12-26 18:37:05,150: [INFO] Epoch [126]: loss = 0.39120718
+2025-12-26 18:37:06,611: [INFO] Epoch [127]: val_loss = 0.49176749
+2025-12-26 18:38:54,793: [INFO] Epoch [127]: loss = 0.39434199
+2025-12-26 18:38:56,312: [INFO] Epoch [128]: val_loss = 0.48982497
+2025-12-26 18:40:43,572: [INFO] Epoch [128]: loss = 0.39388789
+2025-12-26 18:40:45,012: [INFO] Epoch [129]: val_loss = 0.49437147
+2025-12-26 18:42:32,476: [INFO] Epoch [129]: loss = 0.39485405
+2025-12-26 18:42:34,012: [INFO] Epoch [130]: val_loss = 0.49246545
+2025-12-26 18:44:24,072: [INFO] Epoch [130]: loss = 0.39075325
+2025-12-26 18:44:25,612: [INFO] Epoch [131]: val_loss = 0.51833930
+2025-12-26 18:46:15,498: [INFO] Epoch [131]: loss = 0.39447027
+2025-12-26 18:46:16,931: [INFO] Epoch [132]: val_loss = 0.48003947
+2025-12-26 18:48:05,343: [INFO] Epoch [132]: loss = 0.39434897
+2025-12-26 18:48:06,812: [INFO] Epoch [133]: val_loss = 0.49718059
+2025-12-26 18:49:55,181: [INFO] Epoch [133]: loss = 0.39328515
+2025-12-26 18:49:56,612: [INFO] Epoch [134]: val_loss = 0.48965228
+2025-12-26 18:51:44,446: [INFO] Epoch [134]: loss = 0.39368132
+2025-12-26 18:51:45,910: [INFO] Epoch [135]: val_loss = 0.50781692
+2025-12-26 18:53:34,066: [INFO] Epoch [135]: loss = 0.39521807
+2025-12-26 18:53:35,517: [INFO] Epoch [136]: val_loss = 0.49129677
+2025-12-26 18:55:24,158: [INFO] Epoch [136]: loss = 0.39310845
+2025-12-26 18:55:25,611: [INFO] Epoch [137]: val_loss = 0.50138287
+2025-12-26 18:57:13,543: [INFO] Epoch [137]: loss = 0.39277331
+2025-12-26 18:57:15,011: [INFO] Epoch [138]: val_loss = 0.49667891
+2025-12-26 18:59:02,557: [INFO] Epoch [138]: loss = 0.39367320
+2025-12-26 18:59:04,012: [INFO] Epoch [139]: val_loss = 0.49262191
+2025-12-26 19:00:51,448: [INFO] Epoch [139]: loss = 0.39519035
+2025-12-26 19:00:51,449: [INFO] Checkpointing model at epoch 140
+2025-12-26 19:00:52,031: [INFO] Model checkpointed at epoch 140
+2025-12-26 19:00:53,512: [INFO] Epoch [140]: val_loss = 0.50657800
+2025-12-26 19:02:40,830: [INFO] Epoch [140]: loss = 0.39009643
+2025-12-26 19:02:42,311: [INFO] Epoch [141]: val_loss = 0.48729330
+2025-12-26 19:04:29,914: [INFO] Epoch [141]: loss = 0.39552269
+2025-12-26 19:04:31,412: [INFO] Epoch [142]: val_loss = 0.49246952
+2025-12-26 19:06:18,642: [INFO] Epoch [142]: loss = 0.39385797
+2025-12-26 19:06:20,111: [INFO] Epoch [143]: val_loss = 0.49985805
+2025-12-26 19:08:07,780: [INFO] Epoch [143]: loss = 0.39398983
+2025-12-26 19:08:09,216: [INFO] Epoch [144]: val_loss = 0.49885565
+2025-12-26 19:09:56,876: [INFO] Epoch [144]: loss = 0.39509994
+2025-12-26 19:09:58,312: [INFO] Epoch [145]: val_loss = 0.50998864
+2025-12-26 19:11:46,490: [INFO] Epoch [145]: loss = 0.39288819
+2025-12-26 19:11:48,006: [INFO] Epoch [146]: val_loss = 0.53312138
+2025-12-26 19:12:12,458: [WARNING] [TRAIN] Training interrupted by user. Saving model...
+2025-12-26 19:12:12,459: [INFO] [TRAIN] Saving model before exiting...
+2025-12-26 19:12:13,159: [INFO] [TRAIN] Training process finished.
+2025-12-26 19:24:52,215: [INFO] Logger initialized with writer handler at: /workspace/logs/logfile.log
+2025-12-26 19:24:52,223: [INFO] TensorBoard logs will be stored in: /workspace/logs/logs
+2025-12-26 19:24:52,223: [INFO] Model checkpoints will be stored in: /workspace/logs/checkpoints
+2025-12-26 19:24:52,236: [INFO] TensorBoard running at http://0.0.0.0:6006/ (pid=114689)
+2025-12-26 19:24:52,246: [INFO] Initializer set up seed: 1766777092
+2025-12-26 19:24:52,250: [INFO] PyTorch is now configured to use GPU 0: NVIDIA A40
+2025-12-26 19:24:52,250: [INFO] [GPU 0 - NVIDIA A40] Memory Stats:
+2025-12-26 19:24:52,251: [INFO]   Total Memory      : 45498.00 MB
+2025-12-26 19:24:52,251: [INFO]   Currently Allocated : 0.00 MB
+2025-12-26 19:24:52,251: [INFO]   Currently Reserved  : 0.00 MB
+2025-12-26 19:24:52,252: [INFO]   Max Allocated       : 0.00 MB
+2025-12-26 19:24:52,252: [INFO]   Max Reserved        : 0.00 MB
+2025-12-26 19:24:52,252: [INFO] Setup information:
+2025-12-26 19:24:52,253: [INFO] - Setup path: /workspace/logs
+2025-12-26 19:24:52,253: [INFO] - Setup checkpoints path: /workspace/logs/checkpoints
+2025-12-26 19:24:52,253: [INFO] - Setup device: cuda:0
+2025-12-26 19:24:52,254: [INFO] - Setup seed: 1766777092
+2025-12-26 19:24:52,254: [INFO] - Setup logger: <Logger src.dlutils.setup.logger (INFO)>
+2025-12-26 19:24:52,254: [INFO] - Setup writer: <torch.utils.tensorboard.writer.SummaryWriter object at 0x742242ae8a10>
+2025-12-26 19:24:52,254: [INFO] - Setup save each: 20
+2025-12-26 19:24:52,258: [INFO] [SegmentationDataset] Loaded dataset: /workspace/data/tokens-A000-segmentation
+2025-12-26 19:24:52,259: [INFO] [SegmentationDataset] Loaded dataset length: 26510
+2025-12-26 19:24:52,282: [INFO] [SegmentationDataset] Loaded dataset: /workspace/data/tokens-A001-segmentation
+2025-12-26 19:24:52,283: [INFO] [SegmentationDataset] Loaded dataset length: 3336
+2025-12-26 19:24:52,746: [INFO] [TRAIN] Reloading model, optimizer and scheduler states...
+2025-12-26 19:24:52,988: [INFO] Model reloaded from /workspace/logs/checkpoints/model_epoch_40.pt at epoch 40 and seed 1766760356
+2025-12-26 19:24:52,989: [INFO] Optimizer state_dict loaded from /workspace/logs/checkpoints/model_epoch_40.pt
+2025-12-26 19:24:52,989: [INFO] Scheduler state_dict loaded from /workspace/logs/checkpoints/model_epoch_40.pt
+2025-12-26 19:24:52,990: [INFO] [TRAIN] Model Configuration:
+{'vocab_size': 32768, 'model_dim': 256, 'max_tokens': 382, 'max_sentences': 384, 'valid_padding': True, 'cosenet': CoSeNetConfig(trainable=True, init_scale=5.0), 'transformers': [TransformerConfig(attention_heads=16, feed_forward_multiplier=8, dropout=0.0, pre_normalize=True), TransformerConfig(attention_heads=16, feed_forward_multiplier=8, dropout=0.0, pre_normalize=True)]}
+2025-12-26 19:24:52,991: [INFO] [TRAIN] Model parameters: 11.022865 M
+2025-12-26 19:24:52,991: [INFO] [TRAIN] Trainable parameters: 11.022865 M
+2025-12-26 19:24:52,992: [INFO] [TRAIN] Training batches: 2651
+2025-12-26 20:38:24,396: [INFO] Epoch [40]: loss = 0.44221879
+2025-12-26 20:39:27,131: [INFO] Epoch [41]: val_loss = 0.43405354
+2025-12-26 21:50:17,867: [INFO] Epoch [41]: loss = 0.42664148
+2025-12-26 21:51:15,331: [INFO] Epoch [42]: val_loss = 0.42706228
+2025-12-26 23:02:34,039: [INFO] Epoch [42]: loss = 0.41818042
+2025-12-26 23:03:32,932: [INFO] Epoch [43]: val_loss = 0.42457028
+2025-12-27 00:15:19,536: [INFO] Epoch [43]: loss = 0.41249070
+2025-12-27 00:16:17,232: [INFO] Epoch [44]: val_loss = 0.42501960
+2025-12-27 01:27:24,113: [INFO] Epoch [44]: loss = 0.40819168
+2025-12-27 01:28:21,631: [INFO] Epoch [45]: val_loss = 0.42466300
+2025-12-27 02:39:08,276: [INFO] Epoch [45]: loss = 0.40515616
+2025-12-27 02:40:05,744: [INFO] Epoch [46]: val_loss = 0.42623002
+2025-12-27 03:50:51,982: [INFO] Epoch [46]: loss = 0.40199136
+2025-12-27 03:51:49,542: [INFO] Epoch [47]: val_loss = 0.42870347
+2025-12-27 05:02:35,984: [INFO] Epoch [47]: loss = 0.40118613
+2025-12-27 05:03:33,643: [INFO] Epoch [48]: val_loss = 0.42845377
+2025-12-27 06:14:22,488: [INFO] Epoch [48]: loss = 0.40013945
+2025-12-27 06:15:19,931: [INFO] Epoch [49]: val_loss = 0.43143284
+2025-12-27 07:26:05,041: [INFO] Epoch [49]: loss = 0.39879406
+2025-12-27 07:27:02,632: [INFO] Epoch [50]: val_loss = 0.43078374
+2025-12-27 08:37:47,083: [INFO] Epoch [50]: loss = 0.39769130
+2025-12-27 08:38:44,631: [INFO] Epoch [51]: val_loss = 0.43248296
+2025-12-27 09:49:30,251: [INFO] Epoch [51]: loss = 0.39770448
+2025-12-27 09:50:27,831: [INFO] Epoch [52]: val_loss = 0.43522716
+2025-12-27 11:01:17,000: [INFO] Epoch [52]: loss = 0.39691210
+2025-12-27 11:02:14,669: [INFO] Epoch [53]: val_loss = 0.43381873
+2025-12-27 12:13:00,032: [INFO] Epoch [53]: loss = 0.39669390
+2025-12-27 12:13:57,331: [INFO] Epoch [54]: val_loss = 0.43624624
+2025-12-27 13:24:41,611: [INFO] Epoch [54]: loss = 0.39618159
+2025-12-27 13:25:38,932: [INFO] Epoch [55]: val_loss = 0.43923773
+2025-12-27 14:36:23,346: [INFO] Epoch [55]: loss = 0.39546988
+2025-12-27 14:37:20,669: [INFO] Epoch [56]: val_loss = 0.44194769
+2025-12-27 15:48:09,320: [INFO] Epoch [56]: loss = 0.39544456
+2025-12-27 15:49:06,843: [INFO] Epoch [57]: val_loss = 0.43803932
+2025-12-27 16:59:54,398: [INFO] Epoch [57]: loss = 0.39482789
+2025-12-27 17:00:52,031: [INFO] Epoch [58]: val_loss = 0.43835160
+2025-12-27 18:11:44,408: [INFO] Epoch [58]: loss = 0.39461407
+2025-12-27 18:12:41,958: [INFO] Epoch [59]: val_loss = 0.44492985
+2025-12-27 19:23:29,892: [INFO] Epoch [59]: loss = 0.39447898
+2025-12-27 19:23:29,894: [INFO] Checkpointing model at epoch 60
+2025-12-27 19:23:30,517: [INFO] Model checkpointed at epoch 60
+2025-12-27 19:24:27,831: [INFO] Epoch [60]: val_loss = 0.44300878
+2025-12-27 19:27:50,666: [WARNING] [TRAIN] Training interrupted by user. Saving model...
+2025-12-27 19:27:50,668: [INFO] [TRAIN] Saving model before exiting...
+2025-12-27 19:27:51,180: [INFO] [TRAIN] Training process finished.
+2025-12-27 19:33:34,471: [INFO] Logger initialized with writer handler at: /workspace/logs/logfile.log
+2025-12-27 19:33:34,478: [INFO] TensorBoard logs will be stored in: /workspace/logs/logs
+2025-12-27 19:33:34,479: [INFO] Model checkpoints will be stored in: /workspace/logs/checkpoints
+2025-12-27 19:33:34,491: [INFO] TensorBoard running at http://0.0.0.0:6006/ (pid=235187)
+2025-12-27 19:33:34,498: [INFO] Initializer set up seed: 1766864014
+2025-12-27 19:33:34,501: [INFO] PyTorch is now configured to use GPU 0: NVIDIA A40
+2025-12-27 19:33:34,507: [INFO] [GPU 0 - NVIDIA A40] Memory Stats:
+2025-12-27 19:33:34,507: [INFO]   Total Memory      : 45498.00 MB
+2025-12-27 19:33:34,507: [INFO]   Currently Allocated : 0.00 MB
+2025-12-27 19:33:34,507: [INFO]   Currently Reserved  : 0.00 MB
+2025-12-27 19:33:34,508: [INFO]   Max Allocated       : 0.00 MB
+2025-12-27 19:33:34,508: [INFO]   Max Reserved        : 0.00 MB
+2025-12-27 19:33:34,508: [INFO] Setup information:
+2025-12-27 19:33:34,509: [INFO] - Setup path: /workspace/logs
+2025-12-27 19:33:34,509: [INFO] - Setup checkpoints path: /workspace/logs/checkpoints
+2025-12-27 19:33:34,509: [INFO] - Setup device: cuda:0
+2025-12-27 19:33:34,510: [INFO] - Setup seed: 1766864014
+2025-12-27 19:33:34,510: [INFO] - Setup logger: <Logger src.dlutils.setup.logger (INFO)>
+2025-12-27 19:33:34,510: [INFO] - Setup writer: <torch.utils.tensorboard.writer.SummaryWriter object at 0x74fc724f4f50>
+2025-12-27 19:33:34,511: [INFO] - Setup save each: 1
+2025-12-27 19:33:34,515: [INFO] [SegmentationDataset] Loaded dataset: /workspace/data/tokens-A000-segmentation
+2025-12-27 19:33:34,515: [INFO] [SegmentationDataset] Loaded dataset length: 26510
+2025-12-27 19:33:35,262: [INFO] [SegmentationDataset] Loaded dataset: /workspace/data/tokens-A001-segmentation
+2025-12-27 19:33:35,262: [INFO] [SegmentationDataset] Loaded dataset length: 3336
+2025-12-27 19:33:35,796: [INFO] [TRAIN] Reloading model, optimizer and scheduler states...
+2025-12-27 19:33:35,926: [INFO] Model reloaded from /workspace/logs/checkpoints/model_epoch_60.pt at epoch 60 and seed 1766760356
+2025-12-27 19:33:35,927: [INFO] Optimizer state_dict loaded from /workspace/logs/checkpoints/model_epoch_60.pt
+2025-12-27 19:33:35,927: [INFO] Scheduler state_dict loaded from /workspace/logs/checkpoints/model_epoch_60.pt
+2025-12-27 19:33:35,927: [INFO] [TRAIN] Model Configuration:
+{'vocab_size': 32768, 'model_dim': 256, 'max_tokens': 382, 'max_sentences': 384, 'valid_padding': True, 'cosenet': CoSeNetConfig(trainable=True, init_scale=5.0), 'transformers': [TransformerConfig(attention_heads=16, feed_forward_multiplier=8, dropout=0.0, pre_normalize=True), TransformerConfig(attention_heads=16, feed_forward_multiplier=8, dropout=0.0, pre_normalize=True)]}
+2025-12-27 19:33:35,928: [INFO] [TRAIN] Model parameters: 11.022865 M
+2025-12-27 19:33:35,928: [INFO] [TRAIN] Trainable parameters: 11.022865 M
+2025-12-27 19:33:35,928: [INFO] [TRAIN] Training batches: 6628
+2025-12-27 22:37:53,178: [INFO] Epoch [60]: loss = 0.41291385
+2025-12-27 22:37:53,181: [INFO] Checkpointing model at epoch 61
+2025-12-27 22:37:53,822: [INFO] Model checkpointed at epoch 61
+2025-12-27 22:40:30,975: [INFO] Epoch [61]: val_loss = 0.42101070
+2025-12-28 01:37:44,628: [INFO] Epoch [61]: loss = 0.40628406
+2025-12-28 01:37:44,629: [INFO] Checkpointing model at epoch 62
+2025-12-28 01:37:45,260: [INFO] Model checkpointed at epoch 62
+2025-12-28 01:40:08,586: [INFO] Epoch [62]: val_loss = 0.41949525
+2025-12-28 04:37:33,182: [INFO] Epoch [62]: loss = 0.40297545
+2025-12-28 04:37:33,184: [INFO] Checkpointing model at epoch 63
+2025-12-28 04:37:33,865: [INFO] Model checkpointed at epoch 63
+2025-12-28 04:39:58,256: [INFO] Epoch [63]: val_loss = 0.42107245
+2025-12-28 07:37:53,163: [INFO] Epoch [63]: loss = 0.40075299
+2025-12-28 07:37:53,165: [INFO] Checkpointing model at epoch 64
+2025-12-28 07:37:53,812: [INFO] Model checkpointed at epoch 64
+2025-12-28 07:40:18,271: [INFO] Epoch [64]: val_loss = 0.42276877
+2025-12-28 10:37:45,887: [INFO] Epoch [64]: loss = 0.39892435
+2025-12-28 10:37:45,888: [INFO] Checkpointing model at epoch 65
+2025-12-28 10:37:46,521: [INFO] Model checkpointed at epoch 65
+2025-12-28 10:40:11,142: [INFO] Epoch [65]: val_loss = 0.42484788
+2025-12-28 13:37:21,540: [INFO] Epoch [65]: loss = 0.39751294
+2025-12-28 13:37:21,541: [INFO] Checkpointing model at epoch 66
+2025-12-28 13:37:22,128: [INFO] Model checkpointed at epoch 66
+2025-12-28 13:39:45,583: [INFO] Epoch [66]: val_loss = 0.42598702
+2025-12-28 16:36:57,870: [INFO] Epoch [66]: loss = 0.39654398
+2025-12-28 16:36:57,872: [INFO] Checkpointing model at epoch 67
+2025-12-28 16:36:58,476: [INFO] Model checkpointed at epoch 67
+2025-12-28 16:39:23,196: [INFO] Epoch [67]: val_loss = 0.42759763
+2025-12-28 19:37:48,749: [INFO] Epoch [67]: loss = 0.39627669
+2025-12-28 19:37:48,752: [INFO] Checkpointing model at epoch 68
+2025-12-28 19:37:49,475: [INFO] Model checkpointed at epoch 68
+2025-12-28 19:40:15,986: [INFO] Epoch [68]: val_loss = 0.42856705
+2025-12-28 22:39:07,980: [INFO] Epoch [68]: loss = 0.39574215
+2025-12-28 22:39:07,982: [INFO] Checkpointing model at epoch 69
+2025-12-28 22:39:08,617: [INFO] Model checkpointed at epoch 69
+2025-12-28 22:41:33,142: [INFO] Epoch [69]: val_loss = 0.43066087
+2025-12-29 01:39:35,127: [INFO] Epoch [69]: loss = 0.39522947
+2025-12-29 01:39:35,129: [INFO] Checkpointing model at epoch 70
+2025-12-29 01:39:35,774: [INFO] Model checkpointed at epoch 70
+2025-12-29 01:42:00,471: [INFO] Epoch [70]: val_loss = 0.43032571

train/train_model.py ADDED Viewed

	@@ -0,0 +1,128 @@

+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+#   This file was created by: Alberto Palomo Alonso         #
+# Universidad de Alcalá - Escuela Politécnica Superior      #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+# Import statements:
+import torch
+import tqdm
+from train.config import configuration, TrainConfig
+from src.model import SegmentationNetwork, MaskedBCELoss
+from src.dataset import TokenizedSegmentationDataset
+from src.dlutils import Setup, train_step, validation_step
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+def train(controller: Setup, config: TrainConfig):
+    """
+    Main training function
+    :param controller: A training controller
+    :param config: The experiment configuration
+    :return: None
+    """
+    # 1. Train and val datasets:
+    train_dataset = TokenizedSegmentationDataset(
+        tokenized_dataset=config.dataset_config.train_data_path,
+        logger=controller.logger,
+        percentage=config.dataset_config.train_percentage,
+        return_type=tuple
+    ).get_loader(
+        config.batch_size,
+        shuffle=config.dataset_config.shuffle_train,
+        num_workers=config.dataset_config.num_workers
+    )
+    val_dataset = TokenizedSegmentationDataset(
+        tokenized_dataset=config.dataset_config.val_data_path,
+        logger=controller.logger,
+        percentage=config.dataset_config.val_percentage,
+        return_type=tuple
+    ).get_loader(
+        config.batch_size,
+        shuffle=config.dataset_config.shuffle_val,
+        num_workers=config.dataset_config.num_workers
+    )
+    # 2. Model, loss, optimizer:
+    model = SegmentationNetwork(config.model_config).to(controller.device)
+    loss_fn = MaskedBCELoss(valid_pad=config.model_config.valid_padding)
+    optimizer = torch.optim.AdamW(
+        params=model.parameters(),
+        lr=config.learning_rate,
+        weight_decay=config.weight_decay,
+        betas=config.betas
+    )
+    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
+        optimizer=optimizer,
+        T_max=config.num_epochs,
+        eta_min=config.learning_rate_min
+    )
+    # 3. Reload checkpoint if needed:
+    if config.setup_config.reload_checkpoint:
+        controller.logger.info("[TRAIN] Reloading model, optimizer and scheduler states...")
+        controller.reload(model, optimizer, lr_scheduler)
+    # 4. Log info:
+    controller.logger.info(f"[TRAIN] Model Configuration:\n{config.model_config.__dict__}")
+    controller.logger.info(f"[TRAIN] Model parameters: {sum(p.numel() for p in model.parameters()) / 1e6} M")
+    controller.logger.info(f"[TRAIN] Trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad) / 1e6} M")
+    controller.logger.info(f"[TRAIN] Training batches: {len(train_dataset)}")
+    controller.save_config(config)
+    # 5. Set watchers:
+    controller.set_watcher('A')
+    controller.set_watcher('transformer')
+    # 6. Train loop:
+    try:
+        for _ in tqdm.tqdm(range(controller.epoch, config.num_epochs), desc="Epochs", unit="epoch"):
+            # Train step:
+            train_step(
+                model=model,
+                data=train_dataset,
+                loss=loss_fn,
+                optimizer=optimizer,
+                controller=controller,
+                scheduler=lr_scheduler
+            )
+            validation_step(
+                model=model,
+                data=val_dataset,
+                loss=loss_fn,
+                controller=controller
+            )
+    except KeyboardInterrupt:
+        controller.logger.warning("[TRAIN] Training interrupted by user. Saving model...")
+    except Exception as e:
+        controller.logger.error(f"[TRAIN] An error has occurred during training: {e}")
+        raise e
+    finally:
+        # 7. End of training:
+        controller.logger.info("[TRAIN] Saving model before exiting...")
+        controller.save_model(model, optimizer, lr_scheduler)
+        controller.logger.info("[TRAIN] Training process finished.")
+        input("[TRAIN] Training finished. Press any key to exit...")
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                                                           #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+if __name__ == "__main__":
+    conf = configuration()
+    with Setup(
+        path=conf.setup_config.logging_path,
+        device=conf.setup_config.device_number,
+        seed=conf.setup_config.seed,
+        save_each=conf.setup_config.save_model_each,
+        reload_state=conf.setup_config.reload_checkpoint,
+        replay_element=(0, None)
+    ) as setup:
+        train(setup, conf)
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
+#                        END OF FILE                        #
+# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #