File size: 1,159 Bytes
a7d861a
 
 
 
 
 
 
 
 
 
 
 
 
 
c5184df
a7d861a
 
 
 
 
007017f
a7d861a
 
007017f
a7d861a
 
007017f
681b241
 
a7d861a
 
 
007017f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from abc import ABC, abstractmethod
import pandas as pd


_SCALER_X_LABEL = 'score'


class _TransformerProtocol:
    def transform(self, X):
        ...
        

class CountBasedMethodology(ABC):
    @abstractmethod
    def execute(self, neighbours_df: pd.DataFrame, lengths: pd.Series) -> pd.Series:
        ...



class SimpleMethodology(CountBasedMethodology):
    def __init__(self, scalers: dict[int, _TransformerProtocol], fallback_scaler: _TransformerProtocol, score_power: float = 1.0):
        self._scalers = scalers
        self._fallback_scaler = fallback_scaler
        self._score_power = score_power

    def execute(self, neighbours_df: pd.DataFrame, lengths: pd.Series) -> pd.Series:
        unscaled = sum(neighbours_df[col] * (i + 1) for i, col in enumerate(neighbours_df.columns))
        concat = pd.concat([unscaled.rename('unscaled'), lengths.rename('length')], axis=1)
        scaled = concat.apply(
            lambda row: self._scalers.get(row['length'], self._fallback_scaler).transform(pd.DataFrame({_SCALER_X_LABEL: row['unscaled']}, index=[0]))[0][0],
            axis=1
        )
        return 1 - scaled ** self._score_power