| import re |
|
|
| import numpy as np |
| from sklearn.preprocessing import MinMaxScaler |
|
|
|
|
| class Discretizer: |
| def __init__(self, low_limit=-1, high_limit=1, n_tokens=10002): |
| self.scaler = MinMaxScaler() |
|
|
| self.boundaries = np.linspace(low_limit, high_limit, n_tokens - 1) |
| self.centers = (self.boundaries[1:] + self.boundaries[:-1]) / 2 |
| self.centers = np.concatenate((self.centers[:1], self.centers, self.centers[-1:])) |
|
|
| def get_centers(self): |
| return self.centers |
|
|
| def discretize(self, context, fit_length=None): |
| fit_length = len(context) if fit_length is None else fit_length |
| self.scaler.fit(context[:fit_length].reshape(-1, 1)) |
| scaled_context = self.scaler.transform(context.reshape(-1, 1)).reshape(-1) - 0.5 |
|
|
| bin_ids = np.digitize(x=scaled_context, bins=self.boundaries, right=True) |
| dispersed_context = self.centers[bin_ids] |
|
|
| dispersed_context[np.isnan(context)] = np.nan |
|
|
| return dispersed_context |
|
|
| def inverse_discretize(self, scaled_context): |
| context = self.scaler.inverse_transform(scaled_context.reshape(-1, 1) + 0.5).reshape(-1) |
|
|
| return context |
|
|
|
|
| class Serializer: |
| def __init__(self, prec=4, time_sep=" ", time_flag="###", nan_flag="Nan"): |
| self.prec = prec |
| self.time_sep = time_sep |
| self.time_flag = time_flag |
| self.nan_flag = nan_flag |
|
|
| def serialize(self, context): |
| serialized_context = np.array([f"{self.time_flag}{i:.{self.prec}f}{self.time_flag}" for i in context]) |
| serialized_context[np.isnan(context)] = f"{self.time_flag}{self.nan_flag}{self.time_flag}" |
| serialized_context = self.time_sep.join(serialized_context) |
|
|
| return serialized_context |
|
|
| def inverse_serialize(self, serialized_context): |
| pattern = rf"{self.time_flag}(.*?){self.time_flag}" |
| matches = re.findall(pattern, serialized_context) |
|
|
| context = [] |
| for num in matches: |
| try: |
| context.append(float(num)) |
| except ValueError as e: |
| print(e) |
| context.append(np.NaN) |
|
|
| context = np.array(context) |
|
|
| return context |
|
|