| """UCI SECOM ๋ฐ์ดํฐ์
๋ก๋ |
| |
| ๋ฐ๋์ฒด ์ ์กฐ ๊ณต์ ์ผ์ ๋ฐ์ดํฐ (1567 row x 590 feature, pass/fail ๋ผ๋ฒจ) |
| ์ถ์ฒ: https://archive.ics.uci.edu/dataset/179/secom |
| raw/ ์ secom.data, secom_labels.data ๋ฅผ ๋๋ฉด ๋ก๋๋จ (data/README.md ์ฐธ๊ณ ) |
| |
| Tier 1 ์ด์ ํ์ง ์์ด์ ํธ๊ฐ ์ด ๋ฐ์ดํฐ๋ก ์ด์ ์ ์์ ๊ธฐ์ฌ ํผ์ฒ๋ฅผ ๊ณ์ฐ |
| """ |
| from pathlib import Path |
|
|
| import pandas as pd |
|
|
| RAW_DIR = Path(__file__).parent / "raw" |
|
|
|
|
| def load_secom() -> tuple[pd.DataFrame, pd.Series]: |
| """SECOM ์ผ์ ํผ์ฒ์ pass/fail ๋ผ๋ฒจ์ ๋ฐํ |
| |
| features: 1567 x 590 (๊ฒฐ์ธก์น ํฌํจ), ์ปฌ๋ผ๋ช
sensor_000 ~ sensor_589 |
| labels: 1=fail(์ด์), -1=pass(์ ์) |
| """ |
| data_path = RAW_DIR / "secom.data" |
| label_path = RAW_DIR / "secom_labels.data" |
| if not data_path.exists() or not label_path.exists(): |
| raise FileNotFoundError( |
| f"SECOM ๋ฐ์ดํฐ๊ฐ ์์, {RAW_DIR}์ secom.data / secom_labels.data ๋ฅผ ๋์ธ์ " |
| "(data/README.md ์ฐธ๊ณ )" |
| ) |
|
|
| features = pd.read_csv(data_path, sep=r"\s+", header=None) |
| features.columns = [f"sensor_{i:03d}" for i in range(features.shape[1])] |
| labels = pd.read_csv(label_path, sep=r"\s+", header=None, usecols=[0])[0] |
| return features, labels |
|
|