File size: 1,558 Bytes
77d44e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
"""SECOM ์ „์ฒ˜๋ฆฌ

์ด์ƒ ํƒ์ง€ ๋ชจ๋ธ์— ๋„ฃ๊ธฐ ์ „ ๊ณตํ†ต ์ „์ฒ˜๋ฆฌ
- ์ „๋ถ€ ๊ฒฐ์ธก์ด๊ฑฐ๋‚˜ ๋ถ„์‚ฐ์ด 0(์ƒ์ˆ˜)์ธ ์ปฌ๋Ÿผ ์ œ๊ฑฐ
- ๋‚จ์€ ๊ฒฐ์ธก์น˜๋Š” ์ค‘์•™๊ฐ’์œผ๋กœ ์ž„ํ“จํ…Œ์ด์…˜
- StandardScaler๋กœ ์Šค์ผ€์ผ๋ง

agents/detection.py์™€ experiments/ ์–‘์ชฝ์—์„œ ๊ณต์šฉ์œผ๋กœ ์‚ฌ์šฉ
fit์€ train ๋ฐ์ดํ„ฐ์—๋งŒ, transform์€ train/test ๊ณตํ†ต์œผ๋กœ ์ ์šฉ
"""
import numpy as np
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler


class SecomPreprocessor:
    """SECOM ์„ผ์„œ ๋ฐ์ดํ„ฐ ์ „์ฒ˜๋ฆฌ, sklearn ์Šคํƒ€์ผ fit/transform"""

    def __init__(self, var_threshold: float = 0.0):
        self.var_threshold = var_threshold
        self.keep_cols: list[str] = []
        self.imputer = SimpleImputer(strategy="median")
        self.scaler = StandardScaler()

    def fit(self, X: pd.DataFrame) -> "SecomPreprocessor":
        # ์ „๋ถ€ ๊ฒฐ์ธก์ธ ์ปฌ๋Ÿผ ์ œ๊ฑฐ ํ›„, ๋ถ„์‚ฐ์ด ์ž„๊ณ„ ์ดํ•˜(๊ฑฐ์˜ ์ƒ์ˆ˜)์ธ ์ปฌ๋Ÿผ๋„ ์ œ๊ฑฐ
        non_empty = X.columns[X.notna().any()]
        variances = X[non_empty].var()
        self.keep_cols = list(variances[variances > self.var_threshold].index)

        kept = X[self.keep_cols]
        self.imputer.fit(kept)
        self.scaler.fit(self.imputer.transform(kept))
        return self

    def transform(self, X: pd.DataFrame) -> np.ndarray:
        kept = X[self.keep_cols]
        return self.scaler.transform(self.imputer.transform(kept))

    def fit_transform(self, X: pd.DataFrame) -> np.ndarray:
        return self.fit(X).transform(X)