saracandu
/

stldec_random

Safetensors

stldec1024

custom_code

Model card Files Files and versions

xet

Community

saracandu commited on Jul 31, 2025

Commit

cb9d925

verified ·

1 Parent(s): 64eddc2

update

Browse files

Files changed (1) hide show

modeling.py +704 -8

modeling.py CHANGED Viewed

@@ -33,18 +33,13 @@ from transformers.modeling_outputs import (
 )
 from configuration import STLConfig
-# from handcoded_tokenizer import STLTokenizer
 from nltk.translate.bleu_score import sentence_bleu
 from stl import *
 import networkx as nx
-# import phis_generator_depth
 from datasets import load_dataset
-from utils import from_string_to_formula, load_pickle, dump_pickle
-from phis_generator import StlGenerator
-from traj_measure import BaseMeasure
-from kernel import StlKernel
-from anchor_set_generation import anchorGeneration
 import re
 import json
@@ -54,6 +49,105 @@ from transformers.utils import logging
 logger = logging.get_logger(__name__)
 def load_json(path: str) -> Union[Dict, List]:
     """
@@ -68,6 +162,607 @@ def load_json(path: str) -> Union[Dict, List]:
     with open(path, "r") as f:
         return json.load(f)
 class STLTokenizer(PreTrainedTokenizer):
     """
@@ -404,6 +1099,7 @@ class STLAttention(nn.Module):
         return attn_output, None, past_key_value
 class STLEncoder():
     def __init__(self,
@@ -808,7 +1504,7 @@ class STLDecoder(STLModel):
             cross_attentions=all_cross_attentions,
         )
 class STLForCausalLM(STLModel, GenerationMixin):
     _tied_weights_keys = ["lm_head.weight"]

 )
 from configuration import STLConfig
 from nltk.translate.bleu_score import sentence_bleu
 from stl import *
 import networkx as nx
 from datasets import load_dataset
+# from anchor_set_generation import anchorGeneration
 import re
 import json
 logger = logging.get_logger(__name__)
+#### utils ####
+def load_pickle(path):
+    with open(path, 'rb') as f:
+        x = pickle.load(f)
+    return x
+def dump_pickle(name, thing):
+    with open(name + '.pickle', 'wb') as f:
+        pickle.dump(thing, f)
+def from_string_to_formula(st):
+    root_arity = 2 if st.startswith('(') else 1
+    st_split = st.split()
+    if root_arity <= 1:
+        root_op_str = copy.deepcopy(st_split[0])
+        if root_op_str.startswith('x'):
+            atom_sign = True if st_split[1] == '<=' else False
+            root_phi = Atom(var_index=int(st_split[0][2]), lte=atom_sign, threshold=float(st_split[2]))
+            return root_phi
+        else:
+            assert (root_op_str.startswith('not') or root_op_str.startswith('eventually')
+                    or root_op_str.startswith('always'))
+            current_st = copy.deepcopy(st_split[2:-1])
+            if root_op_str == 'not':
+                root_phi = Not(child=from_string_to_formula(' '.join(current_st)))
+            elif root_op_str.startswith('eventually'):
+                unbound, right_unbound, left_time_bound, right_time_bound = set_time_thresholds(root_op_str)
+                root_phi = Eventually(child=from_string_to_formula(' '.join(current_st)), unbound=unbound,
+                                      right_unbound=right_unbound, left_time_bound=left_time_bound,
+                                      right_time_bound=right_time_bound)
+            else:
+                unbound, right_unbound, left_time_bound, right_time_bound = set_time_thresholds(root_op_str)
+                root_phi = Globally(child=from_string_to_formula(' '.join(current_st)), unbound=unbound,
+                                    right_unbound=right_unbound, left_time_bound=left_time_bound,
+                                    right_time_bound=right_time_bound)
+    else:
+        # 1 - delete everything which is contained in other sets of parenthesis (if any)
+        current_st = copy.deepcopy(st_split[1:-1])
+        if '(' in current_st:
+            par_queue = deque()
+            par_idx_list = []
+            for i, sub in enumerate(current_st):
+                if sub == '(':
+                    par_queue.append(i)
+                elif sub == ')':
+                    par_idx_list.append(tuple([par_queue.pop(), i]))
+            # open_par_idx, close_par_idx = [current_st.index(p) for p in ['(', ')']]
+            # union of parentheses range --> from these we may extract the substrings to be the children!!!
+            children_range = []
+            for begin, end in sorted(par_idx_list):
+                if children_range and children_range[-1][1] >= begin - 1:
+                    children_range[-1][1] = max(children_range[-1][1], end)
+                else:
+                    children_range.append([begin, end])
+            n_children = len(children_range)
+            assert (n_children in [1, 2])
+            if n_children == 1:
+                # one of the children is a variable --> need to individuate it
+                var_child_idx = 1 if children_range[0][0] <= 1 else 0  # 0 is left child, 1 is right child
+                if children_range[0][0] != 0 and current_st[children_range[0][0] - 1][0:2] in ['no', 'ev', 'al']:
+                    children_range[0][0] -= 1
+                left_child_str = current_st[:3] if var_child_idx == 0 else \
+                    current_st[children_range[0][0]:children_range[0][1] + 1]
+                right_child_str = current_st[-3:] if var_child_idx == 1 else \
+                    current_st[children_range[0][0]:children_range[0][1] + 1]
+                root_op_str = current_st[children_range[0][1] + 1] if var_child_idx == 1 else \
+                    current_st[children_range[0][0] - 1]
+                assert (root_op_str[:2] in ['an', 'or', 'un'])
+            else:
+                if children_range[0][0] != 0 and current_st[children_range[0][0] - 1][0:2] in ['no', 'ev', 'al']:
+                    children_range[0][0] -= 1
+                if current_st[children_range[1][0] - 1][0:2] in ['no', 'ev', 'al']:
+                    children_range[1][0] -= 1
+                # if there are two children, with parentheses, the element in the middle is the root
+                root_op_str = current_st[children_range[0][1] + 1]
+                assert (root_op_str[:2] in ['an', 'or', 'un'])
+                left_child_str = current_st[children_range[0][0]:children_range[0][1] + 1]
+                right_child_str = current_st[children_range[1][0]:children_range[1][1] + 1]
+        else:
+            # no parentheses means that both children are variables
+            left_child_str = current_st[:3]
+            right_child_str = current_st[-3:]
+            root_op_str = current_st[3]
+        left_child_str = ' '.join(left_child_str)
+        right_child_str = ' '.join(right_child_str)
+        if root_op_str == 'and':
+            root_phi = And(left_child=from_string_to_formula(left_child_str),
+                           right_child=from_string_to_formula(right_child_str))
+        elif root_op_str == 'or':
+            root_phi = Or(left_child=from_string_to_formula(left_child_str),
+                          right_child=from_string_to_formula(right_child_str))
+        else:
+            unbound, right_unbound, left_time_bound, right_time_bound = set_time_thresholds(root_op_str)
+            root_phi = Until(left_child=from_string_to_formula(left_child_str),
+                             right_child=from_string_to_formula(right_child_str),
+                             unbound=unbound, right_unbound=right_unbound, left_time_bound=left_time_bound,
+                             right_time_bound=right_time_bound)
+    return root_phi
 def load_json(path: str) -> Union[Dict, List]:
     """
     with open(path, "r") as f:
         return json.load(f)
+#### phis_generator ####
+class StlGenerator:
+    def __init__(
+        self,
+        leaf_prob: float = 0.3,
+        inner_node_prob: list = None,
+        threshold_mean: float = 0.0,
+        threshold_sd: float = 1.0,
+        unbound_prob: float = 0.1,
+        right_unbound_prob: float = 0.2,
+        time_bound_max_range: float = 20,
+        adaptive_unbound_temporal_ops: bool = True,
+        max_timespan: int = 100,
+    ):
+        """
+        leaf_prob
+            probability of generating a leaf (always zero for root)
+        node_types = ["not", "and", "or", "always", "eventually", "until"]
+            Inner node types
+        inner_node_prob
+            probability vector for the different types of internal nodes
+        threshold_mean
+        threshold_sd
+            mean and std for the normal distribution of the thresholds of atoms
+        unbound_prob
+            probability of a temporal operator to have a time bound o the type [0,infty]
+        time_bound_max_range
+            maximum value of time span of a temporal operator (i.e. max value of t in [0,t])
+        adaptive_unbound_temporal_ops
+            if true, unbounded temporal operators are computed from current point to the end of the signal, otherwise
+            they are evaluated only at time zero.
+        max_timespan
+            maximum time depth of a formula.
+        """
+        # Address the mutability of default arguments
+        if inner_node_prob is None:
+            inner_node_prob = [0.166, 0.166, 0.166, 0.17, 0.166, 0.166]
+        self.leaf_prob = leaf_prob
+        self.inner_node_prob = inner_node_prob
+        self.threshold_mean = threshold_mean
+        self.threshold_sd = threshold_sd
+        self.unbound_prob = unbound_prob
+        self.right_unbound_prob = right_unbound_prob
+        self.time_bound_max_range = time_bound_max_range
+        self.adaptive_unbound_temporal_ops = adaptive_unbound_temporal_ops
+        self.node_types = ["not", "and", "or", "always", "eventually", "until"]
+        self.max_timespan = max_timespan
+    def sample(self, nvars):
+        """
+        Samples a random formula with distribution defined in class instance parameters
+        Parameters
+        ----------
+        nvars : number of variables of input signals
+            how many variables the formula is expected to consider.
+        Returns
+        -------
+        TYPE
+            A random formula.
+        """
+        return self._sample_internal_node(nvars)
+    def bag_sample(self, bag_size, nvars):
+        """
+        Samples a bag of bag_size formulae
+        Parameters
+        ----------
+        bag_size : INT
+            number of formulae.
+        nvars : INT
+            number of vars in formulae.
+        Returns
+        -------
+        a list of formulae.
+        """
+        formulae = []
+        for _ in range(bag_size):
+            phi = self.sample(nvars)
+            formulae.append(phi)
+        return formulae
+    def _sample_internal_node(self, nvars):
+        # Declare & dummy-assign "idiom"
+        node: Union[None, Node]
+        node = None
+        # choose node type
+        nodetype = rnd.choice(self.node_types, p=self.inner_node_prob)
+        while True:
+            if nodetype == "not":
+                n = self._sample_node(nvars)
+                node = stl.Not(n)
+            elif nodetype == "and":
+                n1 = self._sample_node(nvars)
+                n2 = self._sample_node(nvars)
+                node = stl.And(n1, n2)
+            elif nodetype == "or":
+                n1 = self._sample_node(nvars)
+                n2 = self._sample_node(nvars)
+                node = stl.Or(n1, n2)
+            elif nodetype == "always":
+                n = self._sample_node(nvars)
+                unbound, right_unbound, left_time_bound, right_time_bound = self._get_temporal_parameters()
+                node = stl.Globally(
+                    n, unbound, right_unbound, left_time_bound, right_time_bound, self.adaptive_unbound_temporal_ops
+                )
+            elif nodetype == "eventually":
+                n = self._sample_node(nvars)
+                unbound, right_unbound, left_time_bound, right_time_bound = self._get_temporal_parameters()
+                node = stl.Eventually(
+                    n, unbound, right_unbound, left_time_bound, right_time_bound, self.adaptive_unbound_temporal_ops
+                )
+            elif nodetype == "until":
+                n1 = self._sample_node(nvars)
+                n2 = self._sample_node(nvars)
+                unbound, right_unbound, left_time_bound, right_time_bound = self._get_temporal_parameters()
+                node = stl.Until(
+                    n1, n2, unbound, right_unbound, left_time_bound, right_time_bound
+                )
+            if (node is not None) and (node.time_depth() < self.max_timespan):
+                return node
+    def _sample_node(self, nvars):
+        if rnd.rand() < self.leaf_prob:
+            # sample a leaf
+            var, thr, lte = self._get_atom(nvars)
+            return stl.Atom(var, thr, lte)
+        else:
+            return self._sample_internal_node(nvars)
+    def _get_temporal_parameters(self):
+        if rnd.rand() < self.unbound_prob:
+            return True, False, 0, 0
+        elif rnd.rand() < self.right_unbound_prob:
+            return False, True, rnd.randint(self.time_bound_max_range), 1
+        else:
+            left_bound = rnd.randint(self.time_bound_max_range)
+            return False, False, left_bound, rnd.randint(left_bound, self.time_bound_max_range) + 1
+    def _get_atom(self, nvars):
+        variable = rnd.randint(nvars)
+        lte = rnd.rand() > 0.5
+        threshold = rnd.normal(self.threshold_mean, self.threshold_sd)
+        return variable, threshold, lte
+#### traj_measure ####
+class Measure:
+    def sample(self, samples=100000, varn=2, points=100):
+        # Must be overridden
+        pass
+class BaseMeasure(Measure):
+    def __init__(
+        self, mu0=0.0, sigma0=1.0, mu1=0.0, sigma1=1.0, q=0.1, q0=0.5, device="cpu"
+    ):
+        """
+        Parameters
+        ----------
+        mu0 : mean of normal distribution of initial state, optional
+            The default is 0.0.
+        sigma0 : standard deviation of normal distribution of initial state, optional
+            The default is 1.0.
+        mu1 : DOUBLE, optional
+            mean of normal distribution of total variation. The default is 0.0.
+        sigma1 : standard deviation of normal distribution of total variation, optional
+            The default is 1.0.
+        q : DOUBLE, optional
+            probability of change of sign in derivative. The default is 0.1.
+        q0 : DOUBLE, optional
+            probability of initial sign of  derivative. The default is 0.5.
+        device : 'cpu' or 'cuda', optional
+            device on which to run the algorithm. The default is 'cpu'.
+        Returns
+        -------
+        None.
+        """
+        self.mu0 = mu0
+        self.sigma0 = sigma0
+        self.mu1 = mu1
+        self.sigma1 = sigma1
+        self.q = q
+        self.q0 = q0
+        self.device = device
+    def sample(self, samples=100000, varn=2, points=100):
+        """
+        Samples a set of trajectories from the basic measure space, with parameters
+        passed to the sampler
+        Parameters
+        ----------
+        points : INT, optional
+            number of points per trajectory, including initial one. The default is 1000.
+        samples : INT, optional
+            number of trajectories. The default is 100000.
+        varn : INT, optional
+            number of variables per trajectory. The default is 2.
+        Returns
+        -------
+        signal : samples x varn x points double pytorch tensor
+            The sampled signals.
+        """
+        if self.device == "cuda" and not torch.cuda.is_available():
+            raise RuntimeError("GPU card or CUDA library not available!")
+        # generate unif RN
+        signal = torch.rand(samples, varn, points, device=self.device)
+        # first point is special - set to zero for the moment, and set one point to 1
+        signal[:, :, 0] = 0.0
+        signal[:, :, -1] = 1.0
+        # sorting each trajectory
+        signal, _ = torch.sort(signal, 2)
+        # computing increments and storing them in points 1 to end
+        signal[:, :, 1:] = signal[:, :, 1:] - signal[:, :, :-1]
+        # generate initial state, according to a normal distribution
+        signal[:, :, 0] = self.mu0 + self.sigma0 * torch.randn(signal[:, :, 0].size())
+        # sampling change signs from bernoulli in -1, 1
+        derivs = (1 - self.q) * torch.ones(samples, varn, points, device=self.device)
+        derivs = 2 * torch.bernoulli(derivs) - 1
+        # sampling initial derivative
+        derivs[:, :, 0] = self.q0
+        derivs[:, :, 0] = 2 * torch.bernoulli(derivs[:, :, 0]) - 1
+        # taking the cumulative product along axis 2
+        derivs = torch.cumprod(derivs, 2)
+        # sampling total variation
+        totvar = torch.pow(
+            self.mu1 + self.sigma1 * torch.randn(samples, varn, 1, device=self.device),
+            2,
+         )
+        # multiplying total variation and derivatives and making initial point non-invasive
+        derivs = derivs * totvar
+        derivs[:, :, 0] = 1.0
+        # computing trajectories by multiplying and then doing a cumulative sum
+        signal = signal * derivs
+        signal = torch.cumsum(signal, 2)
+        return signal
+#### kernel ####
+realnum = Union[float, int]
+class StlKernel:
+    def __init__(
+        self,
+        measure,
+        normalize=True,
+        exp_kernel=True,
+        sigma2=0.2, # 0.5 meglio, inizialmente era a 0.2
+        integrate_time=False,
+        samples=100000,
+        varn=2,
+        points=100,
+        boolean=False,
+        signals=None,
+    ):
+        self.traj_measure = measure
+        self.exp_kernel = exp_kernel
+        self.normalize = normalize
+        self.sigma2 = sigma2
+        self.samples = samples
+        self.varn = varn
+        self.points = points
+        self.integrate_time = integrate_time
+        if signals is not None:
+            self.signals = signals
+        else:
+            self.signals = measure.sample(points=points, samples=samples, varn=varn)
+        self.boolean = boolean
+    def compute(self, phi1, phi2):
+        return self.compute_one_one(phi1, phi2)
+    def compute_one_one(self, phi1, phi2):
+        phis1: list = [phi1]
+        phis2: list = [phi2]
+        ker = self.compute_bag_bag(phis1, phis2)
+        return ker[0, 0]
+    def compute_bag(self, phis, return_robustness=True):
+        if self.integrate_time:
+            rhos, selfk, len0 = self._compute_robustness_time(phis)
+            kernel_matrix = self._compute_kernel_time(
+                rhos, rhos, selfk, selfk, len0, len0
+            )
+        else:
+            rhos, selfk = self._compute_robustness_no_time(phis)
+            kernel_matrix = self._compute_kernel_no_time(rhos, rhos, selfk, selfk)
+            len0 = None
+        if return_robustness:
+            return kernel_matrix.cpu(), rhos, selfk, len0
+        else:
+            return kernel_matrix.cpu()
+    def compute_one_bag(self, phi1, phis2, return_robustness=False):
+        phis1: list = [phi1]
+        return self.compute_bag_bag(phis1, phis2, return_robustness)
+    def compute_bag_bag(self, phis1, phis2, return_robustness=False):
+        if self.integrate_time:
+            rhos1, selfk1, len1 = self._compute_robustness_time(phis1)
+            rhos2, selfk2, len2 = self._compute_robustness_time(phis2)
+            kernel_matrix = self._compute_kernel_time(
+                rhos1, rhos2, selfk1, selfk2, len1, len2
+            )
+        else:
+            rhos1, selfk1 = self._compute_robustness_no_time(phis1)
+            rhos2, selfk2 = self._compute_robustness_no_time(phis2)
+            len1, len2 = [None, None]
+            kernel_matrix = self._compute_kernel_no_time(rhos1, rhos2, selfk1, selfk2)
+        if return_robustness:
+            return kernel_matrix.cpu(), rhos1, rhos2, selfk1, selfk2, len1, len2
+        else:
+            return kernel_matrix.cpu()
+    def compute_one_from_robustness(self, phi, rhos, rho_self, lengths=None, return_robustness=False):
+        phis: list = [phi]
+        return self.compute_bag_from_robustness(phis, rhos, rho_self, lengths, return_robustness)
+    def compute_bag_from_robustness(self, phis, rhos, rho_self, lengths=None, return_robustness=False):
+        if self.integrate_time:
+            rhos1, selfk1, len1 = self._compute_robustness_time(phis)
+            kernel_matrix = self._compute_kernel_time(
+                rhos1, rhos, selfk1, rho_self, len1, lengths
+            )
+        else:
+            rhos1, selfk1 = self._compute_robustness_no_time(phis)
+            len1 = None
+            kernel_matrix = self._compute_kernel_no_time(rhos1, rhos, selfk1, rho_self)
+        if return_robustness:
+            return kernel_matrix.cpu(), rhos1, selfk1, len1
+        else:
+            return kernel_matrix.cpu()
+    def _compute_robustness_time(self, phis):
+        n = self.samples
+        p = self.points
+        k = len(phis)
+        rhos = torch.zeros((k, n, p), device="cpu")
+        lengths = torch.zeros(k)
+        self_kernels = torch.zeros((k, 1))
+        for i, phi in enumerate(phis):
+            if self.boolean:
+                rho = phi.boolean(self.signals, evaluate_at_all_times=True).float()
+                rho[rho == 0.0] = -1.0
+            else:
+                rho = phi.quantitative(self.signals, evaluate_at_all_times=True)
+            actual_p = rho.size()[2]
+            rho = rho.reshape(n, actual_p).cpu()
+            rhos[i, :, :actual_p] = rho
+            lengths[i] = actual_p
+            self_kernels[i] = torch.tensordot(
+                rho.reshape(1, n, -1), rho.reshape(1, n, -1), dims=[[1, 2], [1, 2]]
+            ) / (actual_p * n)
+        return rhos, self_kernels, lengths
+    def _compute_robustness_no_time(self, phis):
+        n = self.samples
+        k = len(phis)
+        rhos = torch.zeros((k, n), device=self.traj_measure.device)
+        self_kernels = torch.zeros((k, 1), device=self.traj_measure.device)
+        for i, phi in enumerate(phis):
+            if self.boolean:
+                rho = phi.boolean(self.signals, evaluate_at_all_times=False).float()
+                rho[rho == 0.0] = -1.0
+            else:
+                rho = phi.quantitative(self.signals, evaluate_at_all_times=False)
+            self_kernels[i] = rho.dot(rho) / n
+            rhos[i, :] = rho
+        return rhos, self_kernels
+    def _compute_kernel_time(self, rhos1, rhos2, selfk1, selfk2, len1, len2):
+        kernel_matrix = torch.tensordot(rhos1, rhos2, [[1, 2], [1, 2]])
+        length_normalizer = self._compute_trajectory_length_normalizer(len1, len2)
+        kernel_matrix = kernel_matrix * length_normalizer / self.samples
+        if self.normalize:
+            kernel_matrix = self._normalize(kernel_matrix, selfk1, selfk2)
+        if self.exp_kernel:
+            kernel_matrix = self._exponentiate(kernel_matrix, selfk1, selfk2)
+        return kernel_matrix
+    def _compute_kernel_no_time(self, rhos1, rhos2, selfk1, selfk2):
+        kernel_matrix = torch.tensordot(rhos1, rhos2, [[1], [1]])
+        kernel_matrix = kernel_matrix / self.samples
+        if self.normalize:
+            kernel_matrix = self._normalize(kernel_matrix, selfk1, selfk2)
+        if self.exp_kernel:
+            kernel_matrix = self._exponentiate(kernel_matrix, selfk1, selfk2)
+        return kernel_matrix
+    @staticmethod
+    def _normalize(kernel_matrix, selfk1, selfk2):
+        normalize = torch.sqrt(torch.matmul(selfk1, torch.transpose(selfk2, 0, 1)))
+        kernel_matrix = kernel_matrix / normalize
+        return kernel_matrix
+    def _exponentiate(self, kernel_matrix, selfk1, selfk2, sigma2=None):
+        if sigma2 is None:
+            sigma2 = self.sigma2
+        if self.normalize:
+            # selfk is (1.0^2 + 1.0^2)
+            selfk = 2.0
+        else:
+            k1 = selfk1.size()[0]
+            k2 = selfk2.size()[0]
+            selfk = (selfk1 * selfk1).repeat(1, k2) + torch.transpose(
+                selfk2 * selfk2, 0, 1
+            ).repeat(k1, 1)
+        return torch.exp(-(selfk - 2 * kernel_matrix) / (2 * sigma2))
+    @staticmethod
+    def _compute_trajectory_length_normalizer(len1, len2):
+        k1 = len1.size()[0]
+        k2 = len2.size()[0]
+        y1 = len1.reshape(-1, 1)
+        y1 = y1.repeat(1, k2)
+        y2 = len2.repeat(k1, 1)
+        return 1.0 / torch.min(y1, y2)
+class GramMatrix:
+    def __init__(self, kernel, formulae, store_robustness=True, sample=False, sampler=None, bag_size=None):
+        self.kernel = kernel
+        self.formulae_list = formulae
+        # if kernel is computed from robustness at time zero only,
+        # we store the robustness for each formula and each sample
+        # to speed up computation later
+        self.store_robustness = store_robustness
+        self.dim = len(self.formulae_list) if not bag_size else int(bag_size)
+        self.sample = sample  # whether to generate formulae in a controlled manner
+        if self.sample:
+            self.t = 0.99 if self.kernel.boolean else 0.85
+        self.sampler = sampler  # stl formulae generator
+        self._compute_gram_matrix()
+    def _compute_gram_matrix(self):
+        if self.sample:
+            gram = torch.zeros(self.dim, self.dim)
+            rhos = torch.zeros((self.dim, self.kernel.samples), device=self.kernel.traj_measure.device) if \
+                not self.kernel.integrate_time else torch.zeros((self.dim, self.kernel.samples, self.kernel.points),
+                                                                device=self.kernel.traj_measure.device)
+            lengths = torch.zeros(self.dim) if self.kernel.integrate_time else np.zeros(self.dim)
+            kernels = torch.zeros((self.dim, 1), device=self.kernel.traj_measure.device)
+            phis = [self.sampler.sample(nvars=self.kernel.varn)]
+            gram[0, :1], rhos[0], kernels[0, :], lengths[0] = self.kernel.compute_bag(phis, return_robustness=True)
+            while len(phis) < self.dim:
+                i = len(phis)
+                phi = self.sampler.sample(nvars=self.kernel.varn)
+                gram[i, :i], rhos[i], kernels[i, :], lengths[i] = self.kernel.compute_one_from_robustness(
+                    phi, rhos[:i, :], kernels[:i, :], lengths[:i], return_robustness=True)
+                if torch.sum(gram[i, :i + 1] >= self.t) < 3:
+                    phis.append(phi)
+                    gram[:i, i] = gram[i, :i]
+                    gram[i, i] = kernels[i, :]
+            self.formulae_list = phis
+            self.gram = gram.cpu()
+            self.robustness = rhos if self.store_robustness else None
+            self.self_kernels = kernels if self.store_robustness else None
+            self.robustness_lengths = lengths if self.store_robustness else None
+        else:
+            if self.store_robustness:
+                k_matrix, rhos, selfk, len0 = self.kernel.compute_bag(
+                    self.formulae_list, return_robustness=True
+                )
+                self.gram = k_matrix
+                self.robustness = rhos
+                self.self_kernels = selfk
+                self.robustness_lengths = len0
+            else:
+                self.gram = self.kernel.compute_bag(
+                    self.formulae_list, return_robustness=False
+                )
+                self.robustness = None
+                self.self_kernels = None
+                self.robustness_lengths = None
+    def compute_kernel_vector(self, phi):
+        if self.store_robustness:
+            return self.kernel.compute_one_from_robustness(
+                phi, self.robustness, self.self_kernels, self.robustness_lengths
+            )
+        else:
+            return self.kernel.compute_one_bag(phi, self.formulae_list)
+    def compute_bag_kernel_vector(self, phis, generate_phis=False, bag_size=None):
+        if generate_phis:
+            gram_test = torch.zeros(bag_size, self.dim)  # self.dim, bag_size
+            rhos_test = torch.zeros((bag_size, self.kernel.samples), device=self.kernel.traj_measure.device) if \
+                not self.kernel.integrate_time else torch.zeros((bag_size, self.kernel.samples, self.kernel.points),
+                                                                device=self.kernel.traj_measure.device)
+            lengths_test = torch.zeros(bag_size) if self.kernel.integrate_time else np.zeros(bag_size)
+            kernels_test = torch.zeros((bag_size, 1), device=self.kernel.traj_measure.device)
+            phi_test = []
+            while len(phi_test) < bag_size:
+                i = len(phi_test)
+                phi = self.sampler.sample(nvars=self.kernel.varn)
+                if self.store_robustness:
+                    gram_test[i, :], rhos_test[i], kernels_test[i, :], lengths_test[i] = \
+                        self.kernel.compute_one_from_robustness(phi, self.robustness, self.self_kernels,
+                                                                self.robustness_lengths, return_robustness=True)
+                else:
+                    gram_test[i, :], rhos_test[i], _, kernels_test[i, :], _, lengths_test[i], _ = \
+                        self.kernel.compute_one_bag(phi, self.formulae_list, return_robustness=True)
+                if not ((rhos_test[i] > 0).all() or (rhos_test[i] < 0).all()):
+                    phi_test.append(phi)
+            return phi_test, gram_test.cpu()
+        else:
+            if self.store_robustness:
+                return self.kernel.compute_bag_from_robustness(
+                    phis, self.robustness, self.self_kernels, self.robustness_lengths
+                )
+            else:
+                return self.kernel.compute_bag_bag(phis, self.formulae_list)
+    def invert_regularized(self, alpha):
+        regularizer = abs(pow(10, alpha)) * torch.eye(self.dim)
+        return torch.inverse(self.gram + regularizer)
+#### anchor_generation ####
+def anchorGeneration(diff_init = False, # to control whether we want formulae to be semantically different by construction
+                     embed_dim: int = 30, # embedding dimension, aka number of generated formulae in the anchor set
+                     n_vars: int = 3, # dimension of the input signal (3D in this case)
+                     leaf_prob: float = 0.4, # complexity of the generated formula
+                     cosine_similarity_threshold: float = 0.8 # if two formulae cosine similarity exceeds 0.9, then discard one of the two
+                    ) -> str:
+    # initialize STL formula generator
+    sampler = StlGenerator(leaf_prob)
+    # effective anchor set generation
+    if diff_init:
+        # initialize the anchor set with a randomly sampled formula
+        diff_anchor_set = [sampler.sample(nvars=n_vars)]
+        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        mu = BaseMeasure(device=device)
+        # generates a set of random signals working as a tester for the formulae testing
+        signals = mu.sample(samples=10000, varn=n_vars)
+        # computes robustness value for the initial set of formulae in the anchor set
+        anchor_rob_vectors = torch.cat([phi.quantitative(signals, normalize=True).unsqueeze(0) for phi in diff_anchor_set], 0)
+        while len(diff_anchor_set) < embed_dim:
+            # sample the 'remaining' formulae to reach the desired number of `embed_dim` formulae:
+            candidate_anchors = sampler.bag_sample(embed_dim - len(diff_anchor_set), nvars = n_vars)
+            # compute robustness of candidate anchor formulae on the same signals as previous anchor set
+            candidate_robs = torch.cat([phi.quantitative(signals, normalize=True).unsqueeze(0) for phi in candidate_anchors], 0)
+            # compute cosine similarity between current anchor set and candidate new formulae
+            cos_simil = torch.tril(normalize(candidate_robs) @ normalize(anchor_rob_vectors).t(), diagonal=-1)
+            # check which formulae are similar (i.e. greater cosine similarity then threshold) w.r.t. current anchors
+            # NOTA: chiedere a gaia se cosine similarities negative vanno ammazzate con un valore assoluto o meno!
+            similar_idx = [torch.where(cos_simil[r, :] > cosine_similarity_threshold)[0].tolist() for r in range(cos_simil.shape[0])]
+            # keep only those who are semantically distant
+            keep_idx = list(set(np.arange(len(candidate_anchors)).tolist()).difference(set([i for sublist in similar_idx for i in sublist])))
+            diff_anchor_set += [copy.deepcopy(candidate_anchors[i]) for i in keep_idx]
+            # Convert keep_idx to a tensor on the same device as candidate_robs
+            keep_idx_tensor = torch.tensor(keep_idx, device=candidate_robs.device)
+            # Use index_select to pick the relevant rows
+            selected_robs = torch.index_select(candidate_robs, 0, keep_idx_tensor)
+            # Concatenate on the same device
+            anchor_rob_vectors = torch.cat([anchor_rob_vectors, copy.deepcopy(selected_robs)], dim=0)
+            anchor_set = diff_anchor_set[:embed_dim]
+    else:
+        anchor_set = sampler.bag_sample(bag_size=embed_dim, nvars=n_vars)
+    filename = f'anchor_set_no_diff_{embed_dim}_dim'
+    dump_pickle(filename, anchor_set)
+    return filename
+####
 class STLTokenizer(PreTrainedTokenizer):
     """
         return attn_output, None, past_key_value
+####
 class STLEncoder():
     def __init__(self,
             cross_attentions=all_cross_attentions,
         )
+####
 class STLForCausalLM(STLModel, GenerationMixin):
     _tied_weights_keys = ["lm_head.weight"]