concept-drift-simulator / drift_simulator.py
Yoon-gu Hwang
Change to continuous data with line graph visualization
befeb85
import numpy as np
from typing import Tuple
def generate_sudden_drift(n_samples: int = 1000, drift_point: int = 500) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""κΈ‰κ²©ν•œ λ“œλ¦¬ν”„νŠΈ: t μ‹œμ μ—μ„œ κ°‘μžκΈ° 데이터 뢄포 λ³€κ²½"""
X = np.arange(n_samples)
y = np.zeros(n_samples)
# Before drift: y = 2 + sin(X/50) + noise
y[:drift_point] = 2 + np.sin(X[:drift_point] / 50) + np.random.normal(0, 0.3, drift_point)
# After drift: y = 5 - sin(X/50) + noise (μ™„μ „νžˆ λ‹€λ₯Έ νŒ¨ν„΄)
y[drift_point:] = 5 - np.sin(X[drift_point:] / 50) + np.random.normal(0, 0.3, n_samples - drift_point)
drift_points = np.array([drift_point])
return X, y, drift_points
def generate_gradual_drift(n_samples: int = 1000, drift_start: int = 300, drift_end: int = 700) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""점진적 λ“œλ¦¬ν”„νŠΈ: 두 뢄포가 μ„žμ΄λ©° 천천히 μ „ν™˜"""
X = np.arange(n_samples)
y = np.zeros(n_samples)
# Before drift: y = 2 + sin(X/50) + noise
y[:drift_start] = 2 + np.sin(X[:drift_start] / 50) + np.random.normal(0, 0.3, drift_start)
# Gradual transition: μ μ§„μ μœΌλ‘œ λ³€ν™˜
transition_length = drift_end - drift_start
for i in range(drift_start, drift_end):
weight = (i - drift_start) / transition_length
old_concept = 2 + np.sin(X[i] / 50) + np.random.normal(0, 0.3)
new_concept = 5 - np.sin(X[i] / 50) + np.random.normal(0, 0.3)
y[i] = (1 - weight) * old_concept + weight * new_concept
# After drift: y = 5 - sin(X/50) + noise
y[drift_end:] = 5 - np.sin(X[drift_end:] / 50) + np.random.normal(0, 0.3, n_samples - drift_end)
drift_points = np.array([drift_start, drift_end])
return X, y, drift_points
def generate_incremental_drift(n_samples: int = 1000, n_steps: int = 5) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""증뢄적 λ“œλ¦¬ν”„νŠΈ: κ³„λ‹¨μ‹μœΌλ‘œ μž‘μ€ λ³€ν™”κ°€ λˆ„μ """
X = np.arange(n_samples)
y = np.zeros(n_samples)
step_size = n_samples // (n_steps + 1)
drift_points = []
for step in range(n_steps + 1):
start_idx = step * step_size
end_idx = (step + 1) * step_size if step < n_steps else n_samples
# 각 λ‹¨κ³„λ§ˆλ‹€ 평균이 μ‘°κΈˆμ”© λ³€ν™”
mean_shift = 2 + (step / n_steps) * 3 # 2μ—μ„œ 5둜 점진적 λ³€ν™”
y[start_idx:end_idx] = mean_shift + np.sin(X[start_idx:end_idx] / 50) + np.random.normal(0, 0.3, end_idx - start_idx)
if step > 0:
drift_points.append(start_idx)
return X, y, np.array(drift_points)
def generate_recurring_drift(n_samples: int = 1000, cycle_length: int = 250) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""반볡적 λ“œλ¦¬ν”„νŠΈ: 이전 뢄포가 주기적으둜 μž¬λ“±μž₯"""
X = np.arange(n_samples)
y = np.zeros(n_samples)
drift_points = []
for i in range(n_samples):
cycle_pos = i % cycle_length
if cycle_pos < cycle_length // 2:
# Concept A: y = 2 + sin(X/50) + noise
y[i] = 2 + np.sin(X[i] / 50) + np.random.normal(0, 0.3)
else:
# Concept B: y = 5 - sin(X/50) + noise
y[i] = 5 - np.sin(X[i] / 50) + np.random.normal(0, 0.3)
if cycle_pos == cycle_length // 2:
drift_points.append(i)
return X, y, np.array(drift_points)
def get_drift_description(drift_type: str) -> str:
"""λ“œλ¦¬ν”„νŠΈ μœ ν˜•λ³„ μ„€λͺ… λ°˜ν™˜"""
descriptions = {
"sudden": "κΈ‰κ²©ν•œ λ“œλ¦¬ν”„νŠΈ: νŠΉμ • μ‹œμ μ—μ„œ 데이터 뢄포가 κ°‘μžκΈ° λ³€κ²½λ©λ‹ˆλ‹€. 예: 팬데믹, μ •μ±… λ³€κ²½ λ“±",
"gradual": "점진적 λ“œλ¦¬ν”„νŠΈ: 이전 뢄포와 μƒˆ 뢄포가 μ„žμ΄λ©° 천천히 μ „ν™˜λ©λ‹ˆλ‹€. μ „ν™˜ κΈ°κ°„ λ™μ•ˆ 두 컨셉이 κ³΅μ‘΄ν•©λ‹ˆλ‹€.",
"incremental": "증뢄적 λ“œλ¦¬ν”„νŠΈ: μž‘μ€ λ‹¨κ³„λ‘œ λ³€ν™”κ°€ λ°œμƒν•˜μ—¬ 계단식 νŒ¨ν„΄μ„ ν˜•μ„±ν•©λ‹ˆλ‹€.",
"recurring": "반볡적 λ“œλ¦¬ν”„νŠΈ: 이전 뢄포가 주기적으둜 λ‹€μ‹œ λ‚˜νƒ€λ‚©λ‹ˆλ‹€. κ³„μ ˆμ„±μ΄λ‚˜ 주기적 νŒ¨ν„΄μ—μ„œ λ°œμƒν•©λ‹ˆλ‹€."
}
return descriptions.get(drift_type, "μ•Œ 수 μ—†λŠ” λ“œλ¦¬ν”„νŠΈ μœ ν˜•")