| """A set of utility functions to support outlier detection. |
| """ |
|
|
| from __future__ import division |
| from __future__ import print_function |
| from joblib.parallel import cpu_count |
| import numpy as np |
| from numpy import percentile |
| import numbers |
|
|
| import sklearn |
| from sklearn.metrics import precision_score |
| from sklearn.preprocessing import StandardScaler |
| from sklearn.utils import column_or_1d |
| from sklearn.utils import check_array |
| from sklearn.utils import check_consistent_length |
| from sklearn.utils import check_random_state |
| from sklearn.utils.random import sample_without_replacement |
| import torch.nn as nn |
|
|
| MAX_INT = np.iinfo(np.int32).max |
| MIN_INT = -1 * MAX_INT |
|
|
| def zscore(a, axis=0, ddof=0): |
| a = np.asanyarray(a) |
| mns = a.mean(axis=axis) |
| sstd = a.std(axis=axis, ddof=ddof) |
|
|
| if axis and mns.ndim < a.ndim: |
| res = ((a - np.expand_dims(mns, axis=axis)) / |
| np.expand_dims(sstd, axis=axis)) |
| else: |
| res = (a - mns) / sstd |
|
|
| return np.nan_to_num(res) |
|
|
| def pairwise_distances_no_broadcast(X, Y): |
| """Utility function to calculate row-wise euclidean distance of two matrix. |
| Different from pair-wise calculation, this function would not broadcast. |
| For instance, X and Y are both (4,3) matrices, the function would return |
| a distance vector with shape (4,), instead of (4,4). |
| Parameters |
| ---------- |
| X : array of shape (n_samples, n_features) |
| First input samples |
| Y : array of shape (n_samples, n_features) |
| Second input samples |
| Returns |
| ------- |
| distance : array of shape (n_samples,) |
| Row-wise euclidean distance of X and Y |
| """ |
| X = check_array(X) |
| Y = check_array(Y) |
|
|
| if X.shape[0] != Y.shape[0] or X.shape[1] != Y.shape[1]: |
| raise ValueError("pairwise_distances_no_broadcast function receive" |
| "matrix with different shapes {0} and {1}".format( |
| X.shape, Y.shape)) |
| |
| euclidean_sq = np.square(Y - X) |
| return np.sqrt(np.sum(euclidean_sq, axis=1)).ravel() |
|
|
| def getSplit(X): |
| """ |
| Randomly selects a split value from set of scalar data 'X'. |
| Returns the split value. |
| |
| Parameters |
| ---------- |
| X : array |
| Array of scalar values |
| Returns |
| ------- |
| float |
| split value |
| """ |
| xmin = X.min() |
| xmax = X.max() |
| return np.random.uniform(xmin, xmax) |
|
|
| def similarityScore(S, node, alpha): |
| """ |
| Given a set of instances S falling into node and a value alpha >=0, |
| returns for all element x in S the weighted similarity score between x |
| and the centroid M of S (node.M) |
| |
| Parameters |
| ---------- |
| S : array of instances |
| Array of instances that fall into a node |
| node: a DiFF tree node |
| S is the set of instances "falling" into the node |
| alpha: float |
| alpha is the distance scaling hyper-parameter |
| Returns |
| ------- |
| array |
| the array of similarity values between the instances in S and the mean of training instances falling in node |
| """ |
| d = np.shape(S)[1] |
| if len(S) > 0: |
| d = np.shape(S)[1] |
| U = (S-node.M)/node.Mstd |
| U = (2)**(-alpha*(np.sum(U*U/d, axis=1))) |
| else: |
| U = 0 |
|
|
| return U |
|
|
|
|
| def EE(hist): |
| """ |
| given a list of positive values as a histogram drawn from any information source, |
| returns the empirical entropy of its discrete probability function. |
| |
| Parameters |
| ---------- |
| hist: array |
| histogram |
| Returns |
| ------- |
| float |
| empirical entropy estimated from the histogram |
| """ |
| h = np.asarray(hist, dtype=np.float64) |
| if h.sum() <= 0 or (h < 0).any(): |
| return 0 |
| h = h/h.sum() |
| return -(h*np.ma.log2(h)).sum() |
|
|
|
|
| def weightFeature(s, nbins): |
| ''' |
| Given a list of values corresponding to a feature dimension, returns a weight (in [0,1]) that is |
| one minus the normalized empirical entropy, a way to characterize the importance of the feature dimension. |
| |
| Parameters |
| ---------- |
| s: array |
| list of scalar values corresponding to a feature dimension |
| nbins: int |
| the number of bins used to discretize the feature dimension using an histogram. |
| Returns |
| ------- |
| float |
| the importance weight for feature s. |
| ''' |
| if s.min() == s.max(): |
| return 0 |
| hist = np.histogram(s, bins=nbins, density=True) |
| ent = EE(hist[0]) |
| ent = ent/np.log2(nbins) |
| return 1-ent |
|
|
|
|
| def check_parameter(param, low=MIN_INT, high=MAX_INT, param_name='', |
| include_left=False, include_right=False): |
| """Check if an input is within the defined range. |
| |
| Parameters |
| ---------- |
| param : int, float |
| The input parameter to check. |
| |
| low : int, float |
| The lower bound of the range. |
| |
| high : int, float |
| The higher bound of the range. |
| |
| param_name : str, optional (default='') |
| The name of the parameter. |
| |
| include_left : bool, optional (default=False) |
| Whether includes the lower bound (lower bound <=). |
| |
| include_right : bool, optional (default=False) |
| Whether includes the higher bound (<= higher bound). |
| |
| Returns |
| ------- |
| within_range : bool or raise errors |
| Whether the parameter is within the range of (low, high) |
| |
| """ |
|
|
| |
| if not isinstance(param, (numbers.Integral, np.integer, float)): |
| raise TypeError('{param_name} is set to {param} Not numerical'.format( |
| param=param, param_name=param_name)) |
|
|
| if not isinstance(low, (numbers.Integral, np.integer, float)): |
| raise TypeError('low is set to {low}. Not numerical'.format(low=low)) |
|
|
| if not isinstance(high, (numbers.Integral, np.integer, float)): |
| raise TypeError('high is set to {high}. Not numerical'.format( |
| high=high)) |
|
|
| |
| if low is MIN_INT and high is MAX_INT: |
| raise ValueError('Neither low nor high bounds is undefined') |
|
|
| |
| if low > high: |
| raise ValueError( |
| 'Lower bound > Higher bound') |
|
|
| |
| if (include_left and include_right) and (param < low or param > high): |
| raise ValueError( |
| '{param_name} is set to {param}. ' |
| 'Not in the range of [{low}, {high}].'.format( |
| param=param, low=low, high=high, param_name=param_name)) |
|
|
| elif (include_left and not include_right) and ( |
| param < low or param >= high): |
| raise ValueError( |
| '{param_name} is set to {param}. ' |
| 'Not in the range of [{low}, {high}).'.format( |
| param=param, low=low, high=high, param_name=param_name)) |
|
|
| elif (not include_left and include_right) and ( |
| param <= low or param > high): |
| raise ValueError( |
| '{param_name} is set to {param}. ' |
| 'Not in the range of ({low}, {high}].'.format( |
| param=param, low=low, high=high, param_name=param_name)) |
|
|
| elif (not include_left and not include_right) and ( |
| param <= low or param >= high): |
| raise ValueError( |
| '{param_name} is set to {param}. ' |
| 'Not in the range of ({low}, {high}).'.format( |
| param=param, low=low, high=high, param_name=param_name)) |
| else: |
| return True |
|
|
|
|
| def check_detector(detector): |
| """Checks if fit and decision_function methods exist for given detector |
| Parameters |
| ---------- |
| detector : pyod.models |
| Detector instance for which the check is performed. |
| """ |
|
|
| if not hasattr(detector, 'fit') or not hasattr(detector, |
| 'decision_function'): |
| raise AttributeError("%s is not a detector instance." % (detector)) |
|
|
|
|
| def standardizer(X, X_t=None, keep_scalar=False): |
| """Conduct Z-normalization on data to turn input samples become zero-mean |
| and unit variance. |
| Parameters |
| ---------- |
| X : numpy array of shape (n_samples, n_features) |
| The training samples |
| X_t : numpy array of shape (n_samples_new, n_features), optional (default=None) |
| The data to be converted |
| keep_scalar : bool, optional (default=False) |
| The flag to indicate whether to return the scalar |
| Returns |
| ------- |
| X_norm : numpy array of shape (n_samples, n_features) |
| X after the Z-score normalization |
| X_t_norm : numpy array of shape (n_samples, n_features) |
| X_t after the Z-score normalization |
| scalar : sklearn scalar object |
| The scalar used in conversion |
| """ |
| X = check_array(X) |
| scaler = StandardScaler().fit(X) |
|
|
| if X_t is None: |
| if keep_scalar: |
| return scaler.transform(X), scaler |
| else: |
| return scaler.transform(X) |
| else: |
| X_t = check_array(X_t) |
| if X.shape[1] != X_t.shape[1]: |
| raise ValueError( |
| "The number of input data feature should be consistent" |
| "X has {0} features and X_t has {1} features.".format( |
| X.shape[1], X_t.shape[1])) |
| if keep_scalar: |
| return scaler.transform(X), scaler.transform(X_t), scaler |
| else: |
| return scaler.transform(X), scaler.transform(X_t) |
|
|
|
|
| def score_to_label(pred_scores, outliers_fraction=0.1): |
| """Turn raw outlier outlier scores to binary labels (0 or 1). |
| Parameters |
| ---------- |
| pred_scores : list or numpy array of shape (n_samples,) |
| Raw outlier scores. Outliers are assumed have larger values. |
| outliers_fraction : float in (0,1) |
| Percentage of outliers. |
| Returns |
| ------- |
| outlier_labels : numpy array of shape (n_samples,) |
| For each observation, tells whether or not |
| it should be considered as an outlier according to the |
| fitted model. Return the outlier probability, ranging |
| in [0,1]. |
| """ |
| |
| pred_scores = column_or_1d(pred_scores) |
| check_parameter(outliers_fraction, 0, 1) |
|
|
| threshold = percentile(pred_scores, 100 * (1 - outliers_fraction)) |
| pred_labels = (pred_scores > threshold).astype('int') |
| return pred_labels |
|
|
|
|
| def precision_n_scores(y, y_pred, n=None): |
| """Utility function to calculate precision @ rank n. |
| Parameters |
| ---------- |
| y : list or numpy array of shape (n_samples,) |
| The ground truth. Binary (0: inliers, 1: outliers). |
| y_pred : list or numpy array of shape (n_samples,) |
| The raw outlier scores as returned by a fitted model. |
| n : int, optional (default=None) |
| The number of outliers. if not defined, infer using ground truth. |
| Returns |
| ------- |
| precision_at_rank_n : float |
| Precision at rank n score. |
| """ |
|
|
| |
| y_pred = get_label_n(y, y_pred, n) |
|
|
| |
| y = column_or_1d(y) |
| y_pred = column_or_1d(y_pred) |
|
|
| return precision_score(y, y_pred) |
|
|
|
|
| def get_label_n(y, y_pred, n=None): |
| """Function to turn raw outlier scores into binary labels by assign 1 |
| to top n outlier scores. |
| Parameters |
| ---------- |
| y : list or numpy array of shape (n_samples,) |
| The ground truth. Binary (0: inliers, 1: outliers). |
| y_pred : list or numpy array of shape (n_samples,) |
| The raw outlier scores as returned by a fitted model. |
| n : int, optional (default=None) |
| The number of outliers. if not defined, infer using ground truth. |
| Returns |
| ------- |
| labels : numpy array of shape (n_samples,) |
| binary labels 0: normal points and 1: outliers |
| Examples |
| -------- |
| >>> from pyod.utils.utility import get_label_n |
| >>> y = [0, 1, 1, 0, 0] |
| >>> y_pred = [0.1, 0.5, 0.3, 0.2, 0.7] |
| >>> get_label_n(y, y_pred) |
| array([0, 1, 0, 0, 1]) |
| """ |
|
|
| |
| y = column_or_1d(y) |
| y_pred = column_or_1d(y_pred) |
|
|
| check_consistent_length(y, y_pred) |
| y_len = len(y) |
|
|
| |
| if n is not None: |
| outliers_fraction = n / y_len |
| else: |
| outliers_fraction = np.count_nonzero(y) / y_len |
|
|
| threshold = percentile(y_pred, 100 * (1 - outliers_fraction)) |
| y_pred = (y_pred > threshold).astype('int') |
|
|
| return y_pred |
|
|
| def get_intersection(lst1, lst2): |
| """get the overlapping between two lists |
| Parameters |
| ---------- |
| li1 : list or numpy array |
| Input list 1. |
| li2 : list or numpy array |
| Input list 2. |
| Returns |
| ------- |
| difference : list |
| The overlapping between li1 and li2. |
| """ |
| return list(set(lst1) & set(lst2)) |
|
|
|
|
| def get_list_diff(li1, li2): |
| """get the elements in li1 but not li2. li1-li2 |
| Parameters |
| ---------- |
| li1 : list or numpy array |
| Input list 1. |
| li2 : list or numpy array |
| Input list 2. |
| Returns |
| ------- |
| difference : list |
| The difference between li1 and li2. |
| """ |
|
|
| return (list(set(li1) - set(li2))) |
|
|
| def get_diff_elements(li1, li2): |
| """get the elements in li1 but not li2, and vice versa |
| Parameters |
| ---------- |
| li1 : list or numpy array |
| Input list 1. |
| li2 : list or numpy array |
| Input list 2. |
| Returns |
| ------- |
| difference : list |
| The difference between li1 and li2. |
| """ |
| |
| return (list(set(li1) - set(li2)) + list(set(li2) - set(li1))) |
|
|
| def argmaxn(value_list, n, order='desc'): |
| """Return the index of top n elements in the list |
| if order is set to 'desc', otherwise return the index of n smallest ones. |
| Parameters |
| ---------- |
| value_list : list, array, numpy array of shape (n_samples,) |
| A list containing all values. |
| n : int |
| The number of elements to select. |
| order : str, optional (default='desc') |
| The order to sort {'desc', 'asc'}: |
| - 'desc': descending |
| - 'asc': ascending |
| Returns |
| ------- |
| index_list : numpy array of shape (n,) |
| The index of the top n elements. |
| """ |
|
|
| value_list = column_or_1d(value_list) |
| length = len(value_list) |
|
|
| |
| check_parameter(n, 1, length, include_left=True, include_right=True, |
| param_name='n') |
|
|
| |
| if order != 'desc': |
| n = length - n |
|
|
| value_sorted = np.partition(value_list, length - n) |
| threshold = value_sorted[int(length - n)] |
|
|
| if order == 'desc': |
| return np.where(np.greater_equal(value_list, threshold))[0] |
| else: |
| return np.where(np.less(value_list, threshold))[0] |
|
|
|
|
| def invert_order(scores, method='multiplication'): |
| """ Invert the order of a list of values. The smallest value becomes |
| the largest in the inverted list. This is useful while combining |
| multiple detectors since their score order could be different. |
| Parameters |
| ---------- |
| scores : list, array or numpy array with shape (n_samples,) |
| The list of values to be inverted |
| method : str, optional (default='multiplication') |
| Methods used for order inversion. Valid methods are: |
| - 'multiplication': multiply by -1 |
| - 'subtraction': max(scores) - scores |
| Returns |
| ------- |
| inverted_scores : numpy array of shape (n_samples,) |
| The inverted list |
| Examples |
| -------- |
| >>> scores1 = [0.1, 0.3, 0.5, 0.7, 0.2, 0.1] |
| >>> invert_order(scores1) |
| array([-0.1, -0.3, -0.5, -0.7, -0.2, -0.1]) |
| >>> invert_order(scores1, method='subtraction') |
| array([0.6, 0.4, 0.2, 0. , 0.5, 0.6]) |
| """ |
|
|
| scores = column_or_1d(scores) |
|
|
| if method == 'multiplication': |
| return scores.ravel() * -1 |
|
|
| if method == 'subtraction': |
| return (scores.max() - scores).ravel() |
|
|
|
|
| def _get_sklearn_version(): |
| """ Utility function to decide the version of sklearn. |
| PyOD will result in different behaviors with different sklearn version |
| Returns |
| ------- |
| sk_learn version : int |
| """ |
|
|
| sklearn_version = str(sklearn.__version__) |
| if int(sklearn_version.split(".")[1]) < 19 or int( |
| sklearn_version.split(".")[1]) > 23: |
| raise ValueError("Sklearn version error") |
|
|
| return int(sklearn_version.split(".")[1]) |
|
|
|
|
| def _sklearn_version_21(): |
| """ Utility function to decide the version of sklearn |
| In sklearn 21.0, LOF is changed. Specifically, _decision_function |
| is replaced by _score_samples |
| Returns |
| ------- |
| sklearn_21_flag : bool |
| True if sklearn.__version__ is newer than 0.21.0 |
| """ |
| sklearn_version = str(sklearn.__version__) |
| if int(sklearn_version.split(".")[1]) > 20: |
| return True |
| else: |
| return False |
|
|
|
|
| def generate_bagging_indices(random_state, bootstrap_features, n_features, |
| min_features, max_features): |
| """ Randomly draw feature indices. Internal use only. |
| Modified from sklearn/ensemble/bagging.py |
| Parameters |
| ---------- |
| random_state : RandomState |
| A random number generator instance to define the state of the random |
| permutations generator. |
| bootstrap_features : bool |
| Specifies whether to bootstrap indice generation |
| n_features : int |
| Specifies the population size when generating indices |
| min_features : int |
| Lower limit for number of features to randomly sample |
| max_features : int |
| Upper limit for number of features to randomly sample |
| Returns |
| ------- |
| feature_indices : numpy array, shape (n_samples,) |
| Indices for features to bag |
| """ |
|
|
| |
| random_state = check_random_state(random_state) |
|
|
| |
| random_n_features = random_state.randint(min_features, max_features) |
|
|
| |
| feature_indices = generate_indices(random_state, bootstrap_features, |
| n_features, random_n_features) |
|
|
| return feature_indices |
|
|
|
|
| def generate_indices(random_state, bootstrap, n_population, n_samples): |
| """ Draw randomly sampled indices. Internal use only. |
| See sklearn/ensemble/bagging.py |
| Parameters |
| ---------- |
| random_state : RandomState |
| A random number generator instance to define the state of the random |
| permutations generator. |
| bootstrap : bool |
| Specifies whether to bootstrap indice generation |
| n_population : int |
| Specifies the population size when generating indices |
| n_samples : int |
| Specifies number of samples to draw |
| Returns |
| ------- |
| indices : numpy array, shape (n_samples,) |
| randomly drawn indices |
| """ |
|
|
| |
| if bootstrap: |
| indices = random_state.randint(0, n_population, n_samples) |
| else: |
| indices = sample_without_replacement(n_population, n_samples, |
| random_state=random_state) |
|
|
| return indices |
|
|
|
|
| def EuclideanDist(x,y): |
| return np.sqrt(np.sum((x - y) ** 2)) |
|
|
| def dist2set(x, X): |
| l=len(X) |
| ldist=[] |
| for i in range(l): |
| ldist.append(EuclideanDist(x,X[i])) |
| return ldist |
|
|
| def c_factor(n) : |
| if(n<2): |
| n=2 |
| return 2.0*(np.log(n-1)+0.5772156649) - (2.0*(n-1.)/(n*1.0)) |
|
|
|
|
| def all_branches(node, current=[], branches = None): |
| current = current[:node.e] |
| if branches is None: branches = [] |
| if node.ntype == 'inNode': |
| current.append('L') |
| all_branches(node.left, current=current, branches=branches) |
| current = current[:-1] |
| current.append('R') |
| all_branches(node.right, current=current, branches=branches) |
| else: |
| branches.append(current) |
| return branches |
|
|
|
|
| def branch2num(branch, init_root=0): |
| num = [init_root] |
| for b in branch: |
| if b == 'L': |
| num.append(num[-1] * 2 + 1) |
| if b == 'R': |
| num.append(num[-1] * 2 + 2) |
| return num |
|
|
| def _get_n_jobs(n_jobs): |
| """Get number of jobs for the computation. |
| See sklearn/utils/__init__.py for more information. |
| |
| This function reimplements the logic of joblib to determine the actual |
| number of jobs depending on the cpu count. If -1 all CPUs are used. |
| If 1 is given, no parallel computing code is used at all, which is useful |
| for debugging. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. |
| Thus for n_jobs = -2, all CPUs but one are used. |
| Parameters |
| ---------- |
| n_jobs : int |
| Number of jobs stated in joblib convention. |
| Returns |
| ------- |
| n_jobs : int |
| The actual number of jobs as positive integer. |
| """ |
| if n_jobs < 0: |
| return max(cpu_count() + 1 + n_jobs, 1) |
| elif n_jobs == 0: |
| raise ValueError('Parameter n_jobs == 0 has no meaning.') |
| else: |
| return n_jobs |
|
|
|
|
| def _partition_estimators(n_estimators, n_jobs): |
| """Private function used to partition estimators between jobs. |
| See sklearn/ensemble/base.py for more information. |
| """ |
| |
| n_jobs = min(_get_n_jobs(n_jobs), n_estimators) |
|
|
| |
| n_estimators_per_job = (n_estimators // n_jobs) * np.ones(n_jobs, dtype=int) |
| n_estimators_per_job[:n_estimators % n_jobs] += 1 |
| starts = np.cumsum(n_estimators_per_job) |
|
|
| return n_jobs, n_estimators_per_job.tolist(), [0] + starts.tolist() |
|
|
|
|
| def _pprint(params, offset=0, printer=repr): |
| |
| """Pretty print the dictionary 'params' |
| |
| See http://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html |
| and sklearn/base.py for more information. |
| |
| :param params: The dictionary to pretty print |
| :type params: dict |
| |
| :param offset: The offset in characters to add at the begin of each line. |
| :type offset: int |
| |
| :param printer: The function to convert entries to strings, typically |
| the builtin str or repr |
| :type printer: callable |
| |
| :return: None |
| """ |
|
|
| |
| options = np.get_printoptions() |
| np.set_printoptions(precision=5, threshold=64, edgeitems=2) |
| params_list = list() |
| this_line_length = offset |
| line_sep = ',\n' + (1 + offset // 2) * ' ' |
| for i, (k, v) in enumerate(sorted(params.items())): |
| if type(v) is float: |
| |
| |
| |
| this_repr = '%s=%s' % (k, str(v)) |
| else: |
| |
| this_repr = '%s=%s' % (k, printer(v)) |
| if len(this_repr) > 500: |
| this_repr = this_repr[:300] + '...' + this_repr[-100:] |
| if i > 0: |
| if this_line_length + len(this_repr) >= 75 or '\n' in this_repr: |
| params_list.append(line_sep) |
| this_line_length = len(line_sep) |
| else: |
| params_list.append(', ') |
| this_line_length += 2 |
| params_list.append(this_repr) |
| this_line_length += len(this_repr) |
|
|
| np.set_printoptions(**options) |
| lines = ''.join(params_list) |
| |
| lines = '\n'.join(l.rstrip(' ') for l in lines.split('\n')) |
| return lines |
|
|
| def get_activation_by_name(name): |
| activations = { |
| 'relu': nn.ReLU(), |
| 'sigmoid': nn.Sigmoid(), |
| 'tanh': nn.Tanh(), |
| 'leakyrelu':nn.LeakyReLU() |
| } |
|
|
| if name in activations.keys(): |
| return activations[name] |
|
|
| else: |
| raise ValueError(name, "is not a valid activation function") |
|
|
| def get_optimal_n_bins(X, upper_bound=None, epsilon=1): |
| """ Determine optimal number of bins for a histogram using the Birge |
| Rozenblac method (see :cite:`birge2006many` for details.) |
| |
| See https://doi.org/10.1051/ps:2006001 |
| |
| Parameters |
| ---------- |
| X : array-like of shape (n_samples, n_features) |
| The samples to determine the optimal number of bins for. |
| |
| upper_bound : int, default=None |
| The maximum value of n_bins to be considered. |
| If set to None, np.sqrt(X.shape[0]) will be used as upper bound. |
| |
| epsilon : float, default = 1 |
| A stabilizing term added to the logarithm to prevent division by zero. |
| |
| Returns |
| ------- |
| optimal_n_bins : int |
| The optimal value of n_bins according to the Birge Rozenblac method |
| """ |
| if upper_bound is None: |
| upper_bound = int(np.sqrt(X.shape[0])) |
|
|
| n = X.shape[0] |
| maximum_likelihood = np.zeros((upper_bound - 1, 1)) |
|
|
| for i, b in enumerate(range(1, upper_bound)): |
| histogram, _ = np.histogram(X, bins=b) |
|
|
| maximum_likelihood[i] = np.sum( |
| histogram * np.log(b * histogram / n + epsilon) - ( |
| b - 1 + np.power(np.log(b), 2.5))) |
|
|
| return np.argmax(maximum_likelihood) + 1 |