File size: 8,250 Bytes
e057d08
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
"""
Single Experiment Runner
=========================

Run a single model on a single dataset.

Usage:
    python -m runners.run_experiment --dataset adult --model sap-rpt1

Author: UW MSIM Team
Date: November 2025
"""

import argparse
import json
import yaml
import logging
import sys
import os
from pathlib import Path

# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))

from models import *
from datasets.preprocessors import load_dataset
from datasets.dataset_catalog import DatasetCatalog
from evaluation import run_cross_validation, ComputeTracker

logger = logging.getLogger(__name__)


def get_model(model_name: str, task_type: str, config: dict):
    """
    Initialize model by name.

    Parameters
    ----------
    model_name : str
        Model identifier
    task_type : str
        'classification' or 'regression'
    config : dict
        Model configuration

    Returns
    -------
    model : BaseModelWrapper
        Initialized model
    """
    model_map = {
        'sap-rpt1': SAPRPT1Wrapper,
        'sap-rpt1-small': lambda **kwargs: SAPRPT1Wrapper(model_size='small', **kwargs),
        'sap-rpt1-large': lambda **kwargs: SAPRPT1Wrapper(model_size='large', **kwargs),
        'sap-rpt1-hf': SAPRPT1HFWrapper,
        'tabpfn': TabPFNWrapper,
        'tabicl': TabICLWrapper,
        'autogluon': AutoGluonWrapper,
        'xgboost': XGBoostWrapper,
        'catboost': CatBoostWrapper,
        'lightgbm': LightGBMWrapper
    }

    if model_name not in model_map:
        raise ValueError(f"Unknown model: {model_name}. Choose from {list(model_map.keys())}")

    model_class = model_map[model_name]
    
    # Get specific parameters for this model
    model_config_key = model_name.replace('-', '_')
    # Special handling for size variants like sap-rpt1-small -> sap_rpt1
    if model_name.startswith('sap-rpt1-') and model_name not in ['sap-rpt1-hf']:
        model_config_key = 'sap_rpt1'

    model_params = config.get('model_params', {}).get(model_config_key, {})
    
    model = model_class(task_type=task_type, **model_params)

    logger.info(f"Initialized {model_name} for {task_type}")

    return model


def run_single_experiment(
    dataset_name: str,
    model_name: str,
    config: dict,
    output_dir: str = '../results/raw'
) -> dict:
    """
    Run experiment on single dataset with single model.

    Parameters
    ----------
    dataset_name : str
        Dataset name
    model_name : str
        Model name
    config : dict
        Experiment configuration
    output_dir : str
        Where to save results

    Returns
    -------
    summary : dict
        Experiment results
    """
    logger.info(f"\n{'='*60}")
    logger.info(f"Experiment: {model_name} on {dataset_name}")
    logger.info(f"{'='*60}\n")

    # Create output directory
    os.makedirs(output_dir, exist_ok=True)

    # Start compute tracking
    tracker = ComputeTracker(
        cost_per_hour=config.get('cost_per_hour', 0.90),
        gpu_type=config.get('gpu_type', 'H200')
    )
    tracker.start()

    try:
        # Load dataset
        logger.info("Loading dataset...")
        default_dataset_dir = str(Path(__file__).parent.parent.parent / 'datasets')
        dataset_dir = config.get('dataset_dir', default_dataset_dir)
        dataset_path = config.get('dataset_path', None)

        if dataset_path and os.path.exists(dataset_path):
            # Explicit path provided
            X, y, task_type = load_dataset(dataset_path)
        elif os.path.isdir(dataset_dir):
            # Search for dataset files in the download directory
            X_file = None
            y_file = None
            for f in os.listdir(dataset_dir):
                fname_lower = f.lower()
                dname_lower = dataset_name.lower()
                if fname_lower == f"{dname_lower}_x.csv" or (fname_lower.endswith('_x.csv') and dname_lower in fname_lower):
                    X_file = os.path.join(dataset_dir, f)
                if fname_lower == f"{dname_lower}_y.csv" or (fname_lower.endswith('_y.csv') and dname_lower in fname_lower):
                    y_file = os.path.join(dataset_dir, f)

            if X_file and y_file:
                import pandas as pd_load
                X = pd_load.read_csv(X_file)
                y = pd_load.read_csv(y_file).iloc[:, 0]
                # Determine task type
                if y.dtype == 'object' or len(y.unique()) < 20:
                    task_type = 'classification'
                else:
                    task_type = 'regression'
                logger.info(f"Loaded {dataset_name}: {X.shape[0]} samples, {X.shape[1]} features, task={task_type}")
            else:
                # Fallback: try as a single CSV file
                csv_path = os.path.join(dataset_dir, f"{dataset_name}.csv")
                if os.path.exists(csv_path):
                    X, y, task_type = load_dataset(csv_path)
                else:
                    raise FileNotFoundError(
                        f"Dataset '{dataset_name}' not found in {dataset_dir}.\n"
                        f"Available files: {os.listdir(dataset_dir)[:10]}..."
                    )
        else:
            raise FileNotFoundError(
                f"Dataset directory not found: {dataset_dir}"
            )

        # Initialize model
        model = get_model(model_name, task_type, config)

        # Run cross-validation
        fold_results = run_cross_validation(
            model=model,
            X=X,
            y=y,
            task_type=task_type,
            n_folds=config.get('n_folds', 10),
            random_state=config.get('random_state', 42)
        )

        # Stop tracking
        compute_summary = tracker.stop()

        # Aggregate results
        import pandas as pd
        results_df = pd.DataFrame(fold_results)

        summary = {
            'dataset': dataset_name,
            'model': model_name,
            'task_type': task_type,
            'n_samples': len(X),
            'n_features': X.shape[1],
            'n_folds': config.get('n_folds', 10),
            'mean_metrics': results_df.mean().to_dict(),
            'std_metrics': results_df.std().to_dict(),
            'fold_results': fold_results,
            'compute': compute_summary
        }

        # Save results
        output_file = os.path.join(output_dir, f"{dataset_name}_{model_name}.json")
        with open(output_file, 'w') as f:
            json.dump(summary, f, indent=2)

        logger.info(f"\n[SUCCESS] Results saved to {output_file}")

        # Print summary
        primary_metric = 'roc_auc' if task_type == 'classification' else 'r2'
        if primary_metric in summary['mean_metrics']:
            mean_val = summary['mean_metrics'][primary_metric]
            std_val = summary['std_metrics'][primary_metric]
            logger.info(f"\nPrimary Metric ({primary_metric}): {mean_val:.4f} ± {std_val:.4f}")

        return summary

    except Exception as e:
        logger.error(f"Experiment failed: {e}", exc_info=True)
        raise


if __name__ == "__main__":
    # Setup logging
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    )

    # Parse arguments
    parser = argparse.ArgumentParser(description='Run single benchmarking experiment')
    parser.add_argument('--dataset', required=True, help='Dataset name')
    parser.add_argument('--model', required=True, help='Model name')
    parser.add_argument('--config', default='../config/experiments.yaml', help='Config file')
    parser.add_argument('--output-dir', default='../results/raw', help='Output directory')

    args = parser.parse_args()

    # Load config
    if os.path.exists(args.config):
        with open(args.config) as f:
            config = yaml.safe_load(f)
    else:
        config = {
            'n_folds': 10,
            'random_state': 42,
            'cost_per_hour': 0.90,
            'gpu_type': 'H200'
        }

    # Run experiment
    results = run_single_experiment(
        dataset_name=args.dataset,
        model_name=args.model,
        config=config,
        output_dir=args.output_dir
    )

    print("\n[SUCCESS] Experiment complete!")