ModelMatrix / matrix /code /runners /run_experiment.py
Akshay4506's picture
Fix deployment entry point and merge requirements
c4ff02d
"""
Single Experiment Runner
=========================
Run a single model on a single dataset.
Usage:
python -m runners.run_experiment --dataset adult --model sap-rpt1
Author: UW MSIM Team
Date: November 2025
"""
import argparse
import json
import yaml
import logging
import sys
import os
from pathlib import Path
# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))
from models import *
from datasets.preprocessors import load_dataset
from datasets.dataset_catalog import DatasetCatalog
from evaluation import run_cross_validation, ComputeTracker
logger = logging.getLogger(__name__)
def get_model(model_name: str, task_type: str, config: dict):
"""
Initialize model by name.
Parameters
----------
model_name : str
Model identifier
task_type : str
'classification' or 'regression'
config : dict
Model configuration
Returns
-------
model : BaseModelWrapper
Initialized model
"""
model_map = {
'sap-rpt1': SAPRPT1Wrapper,
'sap-rpt1-small': lambda **kwargs: SAPRPT1Wrapper(model_size='small', **kwargs),
'sap-rpt1-large': lambda **kwargs: SAPRPT1Wrapper(model_size='large', **kwargs),
'sap-rpt1-hf': SAPRPT1HFWrapper,
'tabpfn': TabPFNWrapper,
'tabicl': TabICLWrapper,
'autogluon': AutoGluonWrapper,
'xgboost': XGBoostWrapper,
'catboost': CatBoostWrapper,
'lightgbm': LightGBMWrapper
}
if model_name not in model_map:
raise ValueError(f"Unknown model: {model_name}. Choose from {list(model_map.keys())}")
model_class = model_map[model_name]
# Get specific parameters for this model
model_config_key = model_name.replace('-', '_')
# Special handling for size variants like sap-rpt1-small -> sap_rpt1
if model_name.startswith('sap-rpt1-') and model_name not in ['sap-rpt1-hf']:
model_config_key = 'sap_rpt1'
model_params = config.get('model_params', {}).get(model_config_key, {})
model = model_class(task_type=task_type, **model_params)
logger.info(f"Initialized {model_name} for {task_type}")
return model
def run_single_experiment(
dataset_name: str,
model_name: str,
config: dict,
output_dir: str = '../results/raw'
) -> dict:
"""
Run experiment on single dataset with single model.
Parameters
----------
dataset_name : str
Dataset name
model_name : str
Model name
config : dict
Experiment configuration
output_dir : str
Where to save results
Returns
-------
summary : dict
Experiment results
"""
logger.info(f"\n{'='*60}")
logger.info(f"Experiment: {model_name} on {dataset_name}")
logger.info(f"{'='*60}\n")
# Create output directory
os.makedirs(output_dir, exist_ok=True)
# Start compute tracking
tracker = ComputeTracker(
cost_per_hour=config.get('cost_per_hour', 0.90),
gpu_type=config.get('gpu_type', 'H200')
)
tracker.start()
try:
# Load dataset
logger.info("Loading dataset...")
default_dataset_dir = str(Path(__file__).parent.parent.parent / 'datasets')
dataset_dir = config.get('dataset_dir', default_dataset_dir)
dataset_path = config.get('dataset_path', None)
if dataset_path and os.path.exists(dataset_path):
# Explicit path provided
X, y, task_type = load_dataset(dataset_path)
elif os.path.isdir(dataset_dir):
# Search for dataset files in the download directory
X_file = None
y_file = None
for f in os.listdir(dataset_dir):
fname_lower = f.lower()
dname_lower = dataset_name.lower()
if fname_lower == f"{dname_lower}_x.csv" or (fname_lower.endswith('_x.csv') and dname_lower in fname_lower):
X_file = os.path.join(dataset_dir, f)
if fname_lower == f"{dname_lower}_y.csv" or (fname_lower.endswith('_y.csv') and dname_lower in fname_lower):
y_file = os.path.join(dataset_dir, f)
if X_file and y_file:
import pandas as pd_load
X = pd_load.read_csv(X_file)
y = pd_load.read_csv(y_file).iloc[:, 0]
# Determine task type
if y.dtype == 'object' or len(y.unique()) < 20:
task_type = 'classification'
else:
task_type = 'regression'
logger.info(f"Loaded {dataset_name}: {X.shape[0]} samples, {X.shape[1]} features, task={task_type}")
else:
# Fallback: try as a single CSV file
csv_path = os.path.join(dataset_dir, f"{dataset_name}.csv")
if os.path.exists(csv_path):
X, y, task_type = load_dataset(csv_path)
else:
raise FileNotFoundError(
f"Dataset '{dataset_name}' not found in {dataset_dir}.\n"
f"Available files: {os.listdir(dataset_dir)[:10]}..."
)
else:
raise FileNotFoundError(
f"Dataset directory not found: {dataset_dir}"
)
# Initialize model
model = get_model(model_name, task_type, config)
# Run cross-validation
fold_results = run_cross_validation(
model=model,
X=X,
y=y,
task_type=task_type,
n_folds=config.get('n_folds', 10),
random_state=config.get('random_state', 42)
)
# Stop tracking
compute_summary = tracker.stop()
# Aggregate results
import pandas as pd
results_df = pd.DataFrame(fold_results)
summary = {
'dataset': dataset_name,
'model': model_name,
'task_type': task_type,
'n_samples': len(X),
'n_features': X.shape[1],
'n_folds': config.get('n_folds', 10),
'mean_metrics': results_df.mean().to_dict(),
'std_metrics': results_df.std().to_dict(),
'fold_results': fold_results,
'compute': compute_summary
}
# Save results
output_file = os.path.join(output_dir, f"{dataset_name}_{model_name}.json")
with open(output_file, 'w') as f:
json.dump(summary, f, indent=2)
logger.info(f"\n[SUCCESS] Results saved to {output_file}")
# Print summary
primary_metric = 'roc_auc' if task_type == 'classification' else 'r2'
if primary_metric in summary['mean_metrics']:
mean_val = summary['mean_metrics'][primary_metric]
std_val = summary['std_metrics'][primary_metric]
logger.info(f"\nPrimary Metric ({primary_metric}): {mean_val:.4f} ± {std_val:.4f}")
return summary
except Exception as e:
logger.error(f"Experiment failed: {e}", exc_info=True)
raise
if __name__ == "__main__":
# Setup logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
# Parse arguments
parser = argparse.ArgumentParser(description='Run single benchmarking experiment')
parser.add_argument('--dataset', required=True, help='Dataset name')
parser.add_argument('--model', required=True, help='Model name')
parser.add_argument('--config', default='../config/experiments.yaml', help='Config file')
parser.add_argument('--output-dir', default='../results/raw', help='Output directory')
args = parser.parse_args()
# Load config
if os.path.exists(args.config):
with open(args.config) as f:
config = yaml.safe_load(f)
else:
config = {
'n_folds': 10,
'random_state': 42,
'cost_per_hour': 0.90,
'gpu_type': 'H200'
}
# Run experiment
results = run_single_experiment(
dataset_name=args.dataset,
model_name=args.model,
config=config,
output_dir=args.output_dir
)
print("\n[SUCCESS] Experiment complete!")