File size: 1,259 Bytes
8a48888 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | """UCI SECOM ๋ฐ์ดํฐ์
๋ก๋
๋ฐ๋์ฒด ์ ์กฐ ๊ณต์ ์ผ์ ๋ฐ์ดํฐ (1567 row x 590 feature, pass/fail ๋ผ๋ฒจ)
์ถ์ฒ: https://archive.ics.uci.edu/dataset/179/secom
raw/ ์ secom.data, secom_labels.data ๋ฅผ ๋๋ฉด ๋ก๋๋จ (data/README.md ์ฐธ๊ณ )
Tier 1 ์ด์ ํ์ง ์์ด์ ํธ๊ฐ ์ด ๋ฐ์ดํฐ๋ก ์ด์ ์ ์์ ๊ธฐ์ฌ ํผ์ฒ๋ฅผ ๊ณ์ฐ
"""
from pathlib import Path
import pandas as pd
RAW_DIR = Path(__file__).parent / "raw"
def load_secom() -> tuple[pd.DataFrame, pd.Series]:
"""SECOM ์ผ์ ํผ์ฒ์ pass/fail ๋ผ๋ฒจ์ ๋ฐํ
features: 1567 x 590 (๊ฒฐ์ธก์น ํฌํจ), ์ปฌ๋ผ๋ช
sensor_000 ~ sensor_589
labels: 1=fail(์ด์), -1=pass(์ ์)
"""
data_path = RAW_DIR / "secom.data"
label_path = RAW_DIR / "secom_labels.data"
if not data_path.exists() or not label_path.exists():
raise FileNotFoundError(
f"SECOM ๋ฐ์ดํฐ๊ฐ ์์, {RAW_DIR}์ secom.data / secom_labels.data ๋ฅผ ๋์ธ์ "
"(data/README.md ์ฐธ๊ณ )"
)
features = pd.read_csv(data_path, sep=r"\s+", header=None)
features.columns = [f"sensor_{i:03d}" for i in range(features.shape[1])]
labels = pd.read_csv(label_path, sep=r"\s+", header=None, usecols=[0])[0]
return features, labels
|