File size: 4,154 Bytes
3e3dc68
 
 
 
 
befeb85
 
3e3dc68
befeb85
 
3e3dc68
befeb85
 
3e3dc68
 
 
 
 
 
 
befeb85
 
3e3dc68
befeb85
 
3e3dc68
befeb85
3e3dc68
 
 
befeb85
 
 
3e3dc68
befeb85
 
3e3dc68
 
 
 
 
befeb85
3e3dc68
befeb85
 
3e3dc68
befeb85
3e3dc68
 
befeb85
3e3dc68
befeb85
3e3dc68
befeb85
 
 
3e3dc68
 
 
 
 
 
 
 
 
befeb85
 
3e3dc68
 
 
 
 
 
 
befeb85
 
3e3dc68
befeb85
 
3e3dc68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import numpy as np
from typing import Tuple

def generate_sudden_drift(n_samples: int = 1000, drift_point: int = 500) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """κΈ‰κ²©ν•œ λ“œλ¦¬ν”„νŠΈ: t μ‹œμ μ—μ„œ κ°‘μžκΈ° 데이터 뢄포 λ³€κ²½"""
    X = np.arange(n_samples)
    y = np.zeros(n_samples)

    # Before drift: y = 2 + sin(X/50) + noise
    y[:drift_point] = 2 + np.sin(X[:drift_point] / 50) + np.random.normal(0, 0.3, drift_point)

    # After drift: y = 5 - sin(X/50) + noise (μ™„μ „νžˆ λ‹€λ₯Έ νŒ¨ν„΄)
    y[drift_point:] = 5 - np.sin(X[drift_point:] / 50) + np.random.normal(0, 0.3, n_samples - drift_point)

    drift_points = np.array([drift_point])
    return X, y, drift_points


def generate_gradual_drift(n_samples: int = 1000, drift_start: int = 300, drift_end: int = 700) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """점진적 λ“œλ¦¬ν”„νŠΈ: 두 뢄포가 μ„žμ΄λ©° 천천히 μ „ν™˜"""
    X = np.arange(n_samples)
    y = np.zeros(n_samples)

    # Before drift: y = 2 + sin(X/50) + noise
    y[:drift_start] = 2 + np.sin(X[:drift_start] / 50) + np.random.normal(0, 0.3, drift_start)

    # Gradual transition: μ μ§„μ μœΌλ‘œ λ³€ν™˜
    transition_length = drift_end - drift_start
    for i in range(drift_start, drift_end):
        weight = (i - drift_start) / transition_length
        old_concept = 2 + np.sin(X[i] / 50) + np.random.normal(0, 0.3)
        new_concept = 5 - np.sin(X[i] / 50) + np.random.normal(0, 0.3)
        y[i] = (1 - weight) * old_concept + weight * new_concept

    # After drift: y = 5 - sin(X/50) + noise
    y[drift_end:] = 5 - np.sin(X[drift_end:] / 50) + np.random.normal(0, 0.3, n_samples - drift_end)

    drift_points = np.array([drift_start, drift_end])
    return X, y, drift_points


def generate_incremental_drift(n_samples: int = 1000, n_steps: int = 5) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """증뢄적 λ“œλ¦¬ν”„νŠΈ: κ³„λ‹¨μ‹μœΌλ‘œ μž‘μ€ λ³€ν™”κ°€ λˆ„μ """
    X = np.arange(n_samples)
    y = np.zeros(n_samples)

    step_size = n_samples // (n_steps + 1)
    drift_points = []

    for step in range(n_steps + 1):
        start_idx = step * step_size
        end_idx = (step + 1) * step_size if step < n_steps else n_samples

        # 각 λ‹¨κ³„λ§ˆλ‹€ 평균이 μ‘°κΈˆμ”© λ³€ν™”
        mean_shift = 2 + (step / n_steps) * 3  # 2μ—μ„œ 5둜 점진적 λ³€ν™”
        y[start_idx:end_idx] = mean_shift + np.sin(X[start_idx:end_idx] / 50) + np.random.normal(0, 0.3, end_idx - start_idx)

        if step > 0:
            drift_points.append(start_idx)

    return X, y, np.array(drift_points)


def generate_recurring_drift(n_samples: int = 1000, cycle_length: int = 250) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """반볡적 λ“œλ¦¬ν”„νŠΈ: 이전 뢄포가 주기적으둜 μž¬λ“±μž₯"""
    X = np.arange(n_samples)
    y = np.zeros(n_samples)

    drift_points = []

    for i in range(n_samples):
        cycle_pos = i % cycle_length

        if cycle_pos < cycle_length // 2:
            # Concept A: y = 2 + sin(X/50) + noise
            y[i] = 2 + np.sin(X[i] / 50) + np.random.normal(0, 0.3)
        else:
            # Concept B: y = 5 - sin(X/50) + noise
            y[i] = 5 - np.sin(X[i] / 50) + np.random.normal(0, 0.3)

        if cycle_pos == cycle_length // 2:
            drift_points.append(i)

    return X, y, np.array(drift_points)


def get_drift_description(drift_type: str) -> str:
    """λ“œλ¦¬ν”„νŠΈ μœ ν˜•λ³„ μ„€λͺ… λ°˜ν™˜"""
    descriptions = {
        "sudden": "κΈ‰κ²©ν•œ λ“œλ¦¬ν”„νŠΈ: νŠΉμ • μ‹œμ μ—μ„œ 데이터 뢄포가 κ°‘μžκΈ° λ³€κ²½λ©λ‹ˆλ‹€. 예: 팬데믹, μ •μ±… λ³€κ²½ λ“±",
        "gradual": "점진적 λ“œλ¦¬ν”„νŠΈ: 이전 뢄포와 μƒˆ 뢄포가 μ„žμ΄λ©° 천천히 μ „ν™˜λ©λ‹ˆλ‹€. μ „ν™˜ κΈ°κ°„ λ™μ•ˆ 두 컨셉이 κ³΅μ‘΄ν•©λ‹ˆλ‹€.",
        "incremental": "증뢄적 λ“œλ¦¬ν”„νŠΈ: μž‘μ€ λ‹¨κ³„λ‘œ λ³€ν™”κ°€ λ°œμƒν•˜μ—¬ 계단식 νŒ¨ν„΄μ„ ν˜•μ„±ν•©λ‹ˆλ‹€.",
        "recurring": "반볡적 λ“œλ¦¬ν”„νŠΈ: 이전 뢄포가 주기적으둜 λ‹€μ‹œ λ‚˜νƒ€λ‚©λ‹ˆλ‹€. κ³„μ ˆμ„±μ΄λ‚˜ 주기적 νŒ¨ν„΄μ—μ„œ λ°œμƒν•©λ‹ˆλ‹€."
    }
    return descriptions.get(drift_type, "μ•Œ 수 μ—†λŠ” λ“œλ¦¬ν”„νŠΈ μœ ν˜•")