File size: 1,259 Bytes
8a48888
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
"""UCI SECOM ๋ฐ์ดํ„ฐ์…‹ ๋กœ๋”

๋ฐ˜๋„์ฒด ์ œ์กฐ ๊ณต์ • ์„ผ์„œ ๋ฐ์ดํ„ฐ (1567 row x 590 feature, pass/fail ๋ผ๋ฒจ)
์ถœ์ฒ˜: https://archive.ics.uci.edu/dataset/179/secom
raw/ ์— secom.data, secom_labels.data ๋ฅผ ๋‘๋ฉด ๋กœ๋“œ๋จ (data/README.md ์ฐธ๊ณ )

Tier 1 ์ด์ƒ ํƒ์ง€ ์—์ด์ „ํŠธ๊ฐ€ ์ด ๋ฐ์ดํ„ฐ๋กœ ์ด์ƒ ์ ์ˆ˜์™€ ๊ธฐ์—ฌ ํ”ผ์ฒ˜๋ฅผ ๊ณ„์‚ฐ
"""
from pathlib import Path

import pandas as pd

RAW_DIR = Path(__file__).parent / "raw"


def load_secom() -> tuple[pd.DataFrame, pd.Series]:
    """SECOM ์„ผ์„œ ํ”ผ์ฒ˜์™€ pass/fail ๋ผ๋ฒจ์„ ๋ฐ˜ํ™˜

    features: 1567 x 590 (๊ฒฐ์ธก์น˜ ํฌํ•จ), ์ปฌ๋Ÿผ๋ช… sensor_000 ~ sensor_589
    labels: 1=fail(์ด์ƒ), -1=pass(์ •์ƒ)
    """
    data_path = RAW_DIR / "secom.data"
    label_path = RAW_DIR / "secom_labels.data"
    if not data_path.exists() or not label_path.exists():
        raise FileNotFoundError(
            f"SECOM ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Œ, {RAW_DIR}์— secom.data / secom_labels.data ๋ฅผ ๋‘์„ธ์š” "
            "(data/README.md ์ฐธ๊ณ )"
        )

    features = pd.read_csv(data_path, sep=r"\s+", header=None)
    features.columns = [f"sensor_{i:03d}" for i in range(features.shape[1])]
    labels = pd.read_csv(label_path, sep=r"\s+", header=None, usecols=[0])[0]
    return features, labels