File size: 1,558 Bytes
77d44e1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | """SECOM ์ ์ฒ๋ฆฌ
์ด์ ํ์ง ๋ชจ๋ธ์ ๋ฃ๊ธฐ ์ ๊ณตํต ์ ์ฒ๋ฆฌ
- ์ ๋ถ ๊ฒฐ์ธก์ด๊ฑฐ๋ ๋ถ์ฐ์ด 0(์์)์ธ ์ปฌ๋ผ ์ ๊ฑฐ
- ๋จ์ ๊ฒฐ์ธก์น๋ ์ค์๊ฐ์ผ๋ก ์ํจํ
์ด์
- StandardScaler๋ก ์ค์ผ์ผ๋ง
agents/detection.py์ experiments/ ์์ชฝ์์ ๊ณต์ฉ์ผ๋ก ์ฌ์ฉ
fit์ train ๋ฐ์ดํฐ์๋ง, transform์ train/test ๊ณตํต์ผ๋ก ์ ์ฉ
"""
import numpy as np
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
class SecomPreprocessor:
"""SECOM ์ผ์ ๋ฐ์ดํฐ ์ ์ฒ๋ฆฌ, sklearn ์คํ์ผ fit/transform"""
def __init__(self, var_threshold: float = 0.0):
self.var_threshold = var_threshold
self.keep_cols: list[str] = []
self.imputer = SimpleImputer(strategy="median")
self.scaler = StandardScaler()
def fit(self, X: pd.DataFrame) -> "SecomPreprocessor":
# ์ ๋ถ ๊ฒฐ์ธก์ธ ์ปฌ๋ผ ์ ๊ฑฐ ํ, ๋ถ์ฐ์ด ์๊ณ ์ดํ(๊ฑฐ์ ์์)์ธ ์ปฌ๋ผ๋ ์ ๊ฑฐ
non_empty = X.columns[X.notna().any()]
variances = X[non_empty].var()
self.keep_cols = list(variances[variances > self.var_threshold].index)
kept = X[self.keep_cols]
self.imputer.fit(kept)
self.scaler.fit(self.imputer.transform(kept))
return self
def transform(self, X: pd.DataFrame) -> np.ndarray:
kept = X[self.keep_cols]
return self.scaler.transform(self.imputer.transform(kept))
def fit_transform(self, X: pd.DataFrame) -> np.ndarray:
return self.fit(X).transform(X)
|