harmonic-analysis / src /methodology.py
ohollo's picture
Introduce score power
007017f
from abc import ABC, abstractmethod
import pandas as pd
_SCALER_X_LABEL = 'score'
class _TransformerProtocol:
def transform(self, X):
...
class CountBasedMethodology(ABC):
@abstractmethod
def execute(self, neighbours_df: pd.DataFrame, lengths: pd.Series) -> pd.Series:
...
class SimpleMethodology(CountBasedMethodology):
def __init__(self, scalers: dict[int, _TransformerProtocol], fallback_scaler: _TransformerProtocol, score_power: float = 1.0):
self._scalers = scalers
self._fallback_scaler = fallback_scaler
self._score_power = score_power
def execute(self, neighbours_df: pd.DataFrame, lengths: pd.Series) -> pd.Series:
unscaled = sum(neighbours_df[col] * (i + 1) for i, col in enumerate(neighbours_df.columns))
concat = pd.concat([unscaled.rename('unscaled'), lengths.rename('length')], axis=1)
scaled = concat.apply(
lambda row: self._scalers.get(row['length'], self._fallback_scaler).transform(pd.DataFrame({_SCALER_X_LABEL: row['unscaled']}, index=[0]))[0][0],
axis=1
)
return 1 - scaled ** self._score_power