| """SECOM ์ ์ฒ๋ฆฌ |
| |
| ์ด์ ํ์ง ๋ชจ๋ธ์ ๋ฃ๊ธฐ ์ ๊ณตํต ์ ์ฒ๋ฆฌ |
| - ์ ๋ถ ๊ฒฐ์ธก์ด๊ฑฐ๋ ๋ถ์ฐ์ด 0(์์)์ธ ์ปฌ๋ผ ์ ๊ฑฐ |
| - ๋จ์ ๊ฒฐ์ธก์น๋ ์ค์๊ฐ์ผ๋ก ์ํจํ
์ด์
|
| - StandardScaler๋ก ์ค์ผ์ผ๋ง |
| |
| agents/detection.py์ experiments/ ์์ชฝ์์ ๊ณต์ฉ์ผ๋ก ์ฌ์ฉ |
| fit์ train ๋ฐ์ดํฐ์๋ง, transform์ train/test ๊ณตํต์ผ๋ก ์ ์ฉ |
| """ |
| import numpy as np |
| import pandas as pd |
| from sklearn.impute import SimpleImputer |
| from sklearn.preprocessing import StandardScaler |
|
|
|
|
| class SecomPreprocessor: |
| """SECOM ์ผ์ ๋ฐ์ดํฐ ์ ์ฒ๋ฆฌ, sklearn ์คํ์ผ fit/transform""" |
|
|
| def __init__(self, var_threshold: float = 0.0): |
| self.var_threshold = var_threshold |
| self.keep_cols: list[str] = [] |
| self.imputer = SimpleImputer(strategy="median") |
| self.scaler = StandardScaler() |
|
|
| def fit(self, X: pd.DataFrame) -> "SecomPreprocessor": |
| |
| non_empty = X.columns[X.notna().any()] |
| variances = X[non_empty].var() |
| self.keep_cols = list(variances[variances > self.var_threshold].index) |
|
|
| kept = X[self.keep_cols] |
| self.imputer.fit(kept) |
| self.scaler.fit(self.imputer.transform(kept)) |
| return self |
|
|
| def transform(self, X: pd.DataFrame) -> np.ndarray: |
| kept = X[self.keep_cols] |
| return self.scaler.transform(self.imputer.transform(kept)) |
|
|
| def fit_transform(self, X: pd.DataFrame) -> np.ndarray: |
| return self.fit(X).transform(X) |
|
|