File size: 720 Bytes
e057d08
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# Dataset Configuration
# =====================

# Local Datasets (from datasets folder)
local_datasets:
  enabled: true
  path: '../datasets'

# TabZilla Datasets (subset of 20)
tabzilla:
  enabled: false  # Enable when data is available
  path: '../datasets/tabzilla'

# OpenML-CC18 (Classification subset)
openml_cc18:
  enabled: false
  path: '../datasets/openml_cc18'

# Dataset Filters
filters:
  min_samples: 100
  max_samples: 100000
  min_features: 2
  max_features: 1000
  task_types:
    - classification
    - regression

# Preprocessing
preprocessing:
  handle_missing: 'mean'  # mean, median, most_frequent, drop
  encode_categoricals: true
  scale_features: false  # Most models handle scaling internally