Spaces:
Running
Running
| """ | |
| Single Experiment Runner | |
| ========================= | |
| Run a single model on a single dataset. | |
| Usage: | |
| python -m runners.run_experiment --dataset adult --model sap-rpt1 | |
| Author: UW MSIM Team | |
| Date: November 2025 | |
| """ | |
| import argparse | |
| import json | |
| import yaml | |
| import logging | |
| import sys | |
| import os | |
| from pathlib import Path | |
| # Add parent directory to path | |
| sys.path.insert(0, str(Path(__file__).parent.parent)) | |
| from models import * | |
| from datasets.preprocessors import load_dataset | |
| from datasets.dataset_catalog import DatasetCatalog | |
| from evaluation import run_cross_validation, ComputeTracker | |
| logger = logging.getLogger(__name__) | |
| def get_model(model_name: str, task_type: str, config: dict): | |
| """ | |
| Initialize model by name. | |
| Parameters | |
| ---------- | |
| model_name : str | |
| Model identifier | |
| task_type : str | |
| 'classification' or 'regression' | |
| config : dict | |
| Model configuration | |
| Returns | |
| ------- | |
| model : BaseModelWrapper | |
| Initialized model | |
| """ | |
| model_map = { | |
| 'sap-rpt1': SAPRPT1Wrapper, | |
| 'sap-rpt1-small': lambda **kwargs: SAPRPT1Wrapper(model_size='small', **kwargs), | |
| 'sap-rpt1-large': lambda **kwargs: SAPRPT1Wrapper(model_size='large', **kwargs), | |
| 'sap-rpt1-hf': SAPRPT1HFWrapper, | |
| 'tabpfn': TabPFNWrapper, | |
| 'tabicl': TabICLWrapper, | |
| 'autogluon': AutoGluonWrapper, | |
| 'xgboost': XGBoostWrapper, | |
| 'catboost': CatBoostWrapper, | |
| 'lightgbm': LightGBMWrapper | |
| } | |
| if model_name not in model_map: | |
| raise ValueError(f"Unknown model: {model_name}. Choose from {list(model_map.keys())}") | |
| model_class = model_map[model_name] | |
| # Get specific parameters for this model | |
| model_config_key = model_name.replace('-', '_') | |
| # Special handling for size variants like sap-rpt1-small -> sap_rpt1 | |
| if model_name.startswith('sap-rpt1-') and model_name not in ['sap-rpt1-hf']: | |
| model_config_key = 'sap_rpt1' | |
| model_params = config.get('model_params', {}).get(model_config_key, {}) | |
| model = model_class(task_type=task_type, **model_params) | |
| logger.info(f"Initialized {model_name} for {task_type}") | |
| return model | |
| def run_single_experiment( | |
| dataset_name: str, | |
| model_name: str, | |
| config: dict, | |
| output_dir: str = '../results/raw' | |
| ) -> dict: | |
| """ | |
| Run experiment on single dataset with single model. | |
| Parameters | |
| ---------- | |
| dataset_name : str | |
| Dataset name | |
| model_name : str | |
| Model name | |
| config : dict | |
| Experiment configuration | |
| output_dir : str | |
| Where to save results | |
| Returns | |
| ------- | |
| summary : dict | |
| Experiment results | |
| """ | |
| logger.info(f"\n{'='*60}") | |
| logger.info(f"Experiment: {model_name} on {dataset_name}") | |
| logger.info(f"{'='*60}\n") | |
| # Create output directory | |
| os.makedirs(output_dir, exist_ok=True) | |
| # Start compute tracking | |
| tracker = ComputeTracker( | |
| cost_per_hour=config.get('cost_per_hour', 0.90), | |
| gpu_type=config.get('gpu_type', 'H200') | |
| ) | |
| tracker.start() | |
| try: | |
| # Load dataset | |
| logger.info("Loading dataset...") | |
| default_dataset_dir = str(Path(__file__).parent.parent.parent / 'datasets') | |
| dataset_dir = config.get('dataset_dir', default_dataset_dir) | |
| dataset_path = config.get('dataset_path', None) | |
| if dataset_path and os.path.exists(dataset_path): | |
| # Explicit path provided | |
| X, y, task_type = load_dataset(dataset_path) | |
| elif os.path.isdir(dataset_dir): | |
| # Search for dataset files in the download directory | |
| X_file = None | |
| y_file = None | |
| for f in os.listdir(dataset_dir): | |
| fname_lower = f.lower() | |
| dname_lower = dataset_name.lower() | |
| if fname_lower == f"{dname_lower}_x.csv" or (fname_lower.endswith('_x.csv') and dname_lower in fname_lower): | |
| X_file = os.path.join(dataset_dir, f) | |
| if fname_lower == f"{dname_lower}_y.csv" or (fname_lower.endswith('_y.csv') and dname_lower in fname_lower): | |
| y_file = os.path.join(dataset_dir, f) | |
| if X_file and y_file: | |
| import pandas as pd_load | |
| X = pd_load.read_csv(X_file) | |
| y = pd_load.read_csv(y_file).iloc[:, 0] | |
| # Determine task type | |
| if y.dtype == 'object' or len(y.unique()) < 20: | |
| task_type = 'classification' | |
| else: | |
| task_type = 'regression' | |
| logger.info(f"Loaded {dataset_name}: {X.shape[0]} samples, {X.shape[1]} features, task={task_type}") | |
| else: | |
| # Fallback: try as a single CSV file | |
| csv_path = os.path.join(dataset_dir, f"{dataset_name}.csv") | |
| if os.path.exists(csv_path): | |
| X, y, task_type = load_dataset(csv_path) | |
| else: | |
| raise FileNotFoundError( | |
| f"Dataset '{dataset_name}' not found in {dataset_dir}.\n" | |
| f"Available files: {os.listdir(dataset_dir)[:10]}..." | |
| ) | |
| else: | |
| raise FileNotFoundError( | |
| f"Dataset directory not found: {dataset_dir}" | |
| ) | |
| # Initialize model | |
| model = get_model(model_name, task_type, config) | |
| # Run cross-validation | |
| fold_results = run_cross_validation( | |
| model=model, | |
| X=X, | |
| y=y, | |
| task_type=task_type, | |
| n_folds=config.get('n_folds', 10), | |
| random_state=config.get('random_state', 42) | |
| ) | |
| # Stop tracking | |
| compute_summary = tracker.stop() | |
| # Aggregate results | |
| import pandas as pd | |
| results_df = pd.DataFrame(fold_results) | |
| summary = { | |
| 'dataset': dataset_name, | |
| 'model': model_name, | |
| 'task_type': task_type, | |
| 'n_samples': len(X), | |
| 'n_features': X.shape[1], | |
| 'n_folds': config.get('n_folds', 10), | |
| 'mean_metrics': results_df.mean().to_dict(), | |
| 'std_metrics': results_df.std().to_dict(), | |
| 'fold_results': fold_results, | |
| 'compute': compute_summary | |
| } | |
| # Save results | |
| output_file = os.path.join(output_dir, f"{dataset_name}_{model_name}.json") | |
| with open(output_file, 'w') as f: | |
| json.dump(summary, f, indent=2) | |
| logger.info(f"\n[SUCCESS] Results saved to {output_file}") | |
| # Print summary | |
| primary_metric = 'roc_auc' if task_type == 'classification' else 'r2' | |
| if primary_metric in summary['mean_metrics']: | |
| mean_val = summary['mean_metrics'][primary_metric] | |
| std_val = summary['std_metrics'][primary_metric] | |
| logger.info(f"\nPrimary Metric ({primary_metric}): {mean_val:.4f} ± {std_val:.4f}") | |
| return summary | |
| except Exception as e: | |
| logger.error(f"Experiment failed: {e}", exc_info=True) | |
| raise | |
| if __name__ == "__main__": | |
| # Setup logging | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' | |
| ) | |
| # Parse arguments | |
| parser = argparse.ArgumentParser(description='Run single benchmarking experiment') | |
| parser.add_argument('--dataset', required=True, help='Dataset name') | |
| parser.add_argument('--model', required=True, help='Model name') | |
| parser.add_argument('--config', default='../config/experiments.yaml', help='Config file') | |
| parser.add_argument('--output-dir', default='../results/raw', help='Output directory') | |
| args = parser.parse_args() | |
| # Load config | |
| if os.path.exists(args.config): | |
| with open(args.config) as f: | |
| config = yaml.safe_load(f) | |
| else: | |
| config = { | |
| 'n_folds': 10, | |
| 'random_state': 42, | |
| 'cost_per_hour': 0.90, | |
| 'gpu_type': 'H200' | |
| } | |
| # Run experiment | |
| results = run_single_experiment( | |
| dataset_name=args.dataset, | |
| model_name=args.model, | |
| config=config, | |
| output_dir=args.output_dir | |
| ) | |
| print("\n[SUCCESS] Experiment complete!") | |