Spaces:

Akshay4506
/

ModelMatrix

Running

App Files Files Community

Akshay4506 commited on 5 days ago

Commit

e057d08

1 Parent(s): cb9e57b

Revert back

Browse files

Files changed (44) hide show

Dockerfile +2 -16
code/analysis/__init__.py +11 -0
code/analysis/aggregate_results.py +99 -0
code/config/datasets.yaml +33 -0
code/config/experiments.yaml +64 -0
code/config/models.yaml +84 -0
code/docker/Dockerfile +102 -0
code/evaluation/__init__.py +24 -0
code/evaluation/compute_tracker.py +114 -0
code/evaluation/cross_validation.py +127 -0
code/evaluation/metrics.py +116 -0
code/evaluation/statistical_tests.py +109 -0
{webapp → code}/models/__init__.py +0 -0
{webapp → code}/models/autogluon_wrapper.py +0 -0
{webapp → code}/models/base_wrapper.py +0 -0
{webapp → code}/models/baseline_wrappers.py +0 -0
{webapp → code}/models/sap_rpt1_hf_wrapper.py +0 -0
{webapp → code}/models/sap_rpt1_wrapper.py +0 -0
{webapp → code}/models/tabicl_wrapper.py +0 -0
{webapp → code}/models/tabpfn_wrapper.py +24 -43
code/runners/__init__.py +11 -0
code/runners/run_baselines.py +50 -0
code/runners/run_batch.py +289 -0
code/runners/run_experiment.py +260 -0
{webapp → code}/sap_rpt1.egg-info/PKG-INFO +0 -0
code/sap_rpt1.egg-info/SOURCES.txt +28 -0
{webapp → code}/sap_rpt1.egg-info/dependency_links.txt +0 -0
{webapp → code}/sap_rpt1.egg-info/requires.txt +0 -0
code/sap_rpt1.egg-info/top_level.txt +5 -0
code/utils/__init__.py +11 -0
code/utils/logging_utils.py +63 -0
requirements.txt +1 -0
setup.py +2 -2
webapp/benchmark.py +23 -76
webapp/catboost_info/catboost_training.json +200 -200
webapp/catboost_info/learn/events.out.tfevents +1 -1
webapp/catboost_info/learn_error.tsv +200 -200
webapp/catboost_info/time_left.tsv +200 -200
webapp/ensemble.py +5 -11
webapp/main.py +20 -48
webapp/requirements.txt +1 -3
webapp/sap_rpt1.egg-info/SOURCES.txt +0 -15
webapp/sap_rpt1.egg-info/top_level.txt +0 -1
webapp/static/app.js +2 -3

Dockerfile CHANGED Viewed

@@ -17,16 +17,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     && rm -rf /var/lib/apt/lists/*
 USER user
-# ── TabPFN license acceptance ──────────────────────────────────────────────────
-# Set ALL known env var names TabPFN v2 checks for, at the container level.
-# This must be done before any Python code runs — setting them inside Python
-# files is too late because TabPFN checks on import.
-ENV TABPFN_ACCEPT_LICENSE=1 \
-    TABPFN_LICENSE=accept \
-    TABPFN_ACCEPT_TERMS=1 \
-    TABPFN_LICENSE_ACCEPTED=1 \
-    AGREE_TABPFN_LICENSE=1
 # Copy the entire project
 COPY --chown=user . $HOME/app/
@@ -37,12 +27,8 @@ RUN pip install --no-cache-dir -r webapp/requirements.txt
 # Install SAP-RPT-1 OSS directly from GitHub (needed for the real model)
 RUN pip install --no-cache-dir git+https://github.com/SAP-samples/sap-rpt-1-oss.git
-# Pre-download Sentence Transformers weights to avoid runtime hangs
-# This specific model is used by the SAP RPT-1 OSS model.
-RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')"
 # Expose port 7860 (Hugging Face Spaces default port)
 EXPOSE 7860
-# Run the FastAPI app with a single worker to save RAM and avoid download race conditions
-CMD ["python", "-m", "uvicorn", "webapp.main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]

     && rm -rf /var/lib/apt/lists/*
 USER user
 # Copy the entire project
 COPY --chown=user . $HOME/app/
 # Install SAP-RPT-1 OSS directly from GitHub (needed for the real model)
 RUN pip install --no-cache-dir git+https://github.com/SAP-samples/sap-rpt-1-oss.git
 # Expose port 7860 (Hugging Face Spaces default port)
 EXPOSE 7860
+# Run the FastAPI app
+CMD ["python", "-m", "uvicorn", "webapp.main:app", "--host", "0.0.0.0", "--port", "7860"]

code/analysis/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+"""
+Analysis Package
+================
+Results aggregation, statistical analysis, and visualization.
+Author: UW MSIM Team
+Date: November 2025
+"""
+__all__ = ['aggregate_results']

code/analysis/aggregate_results.py ADDED Viewed

	@@ -0,0 +1,99 @@

+"""
+Results Aggregation
+===================
+Aggregate all experiment results into summary tables.
+Author: UW MSIM Team
+Date: November 2025
+"""
+import glob
+import json
+import pandas as pd
+import os
+import logging
+logger = logging.getLogger(__name__)
+def aggregate_all_results(
+    results_dir: str = '../results/raw',
+    output_file: str = '../results/processed/aggregated_results.csv'
+) -> pd.DataFrame:
+    """
+    Aggregate all experiment results into single DataFrame.
+    Parameters
+    ----------
+    results_dir : str
+        Directory containing result JSON files
+    output_file : str
+        Where to save aggregated CSV
+    Returns
+    -------
+    df : pd.DataFrame
+        Aggregated results
+    """
+    logger.info(f"Aggregating results from {results_dir}")
+    result_files = glob.glob(os.path.join(results_dir, '*.json'))
+    logger.info(f"Found {len(result_files)} result files")
+    aggregated = []
+    for file in result_files:
+        try:
+            with open(file) as f:
+                data = json.load(f)
+            record = {
+                'dataset': data['dataset'],
+                'model': data['model'],
+                'task_type': data['task_type'],
+                'n_samples': data['n_samples'],
+                'n_features': data['n_features'],
+                'n_folds': data['n_folds']
+            }
+            # Add mean metrics
+            for metric, value in data['mean_metrics'].items():
+                record[f'mean_{metric}'] = value
+            # Add std metrics
+            for metric, value in data['std_metrics'].items():
+                record[f'std_{metric}'] = value
+            # Add compute info
+            if 'compute' in data:
+                record['elapsed_hours'] = data['compute'].get('elapsed_hours')
+                record['cost_usd'] = data['compute'].get('cost_usd')
+            aggregated.append(record)
+        except Exception as e:
+            logger.warning(f"Failed to process {file}: {e}")
+    # Create DataFrame
+    df = pd.DataFrame(aggregated)
+    # Save
+    os.makedirs(os.path.dirname(output_file), exist_ok=True)
+    df.to_csv(output_file, index=False)
+    logger.info(f"Aggregated {len(df)} results to {output_file}")
+    return df
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    df = aggregate_all_results()
+    print(f"\n✅ Aggregated {len(df)} experiment results")
+    print(f"\nDatasets: {df['dataset'].nunique()}")
+    print(f"Models: {df['model'].nunique()}")
+    print(f"\nSample of results:")
+    print(df.head())

code/config/datasets.yaml ADDED Viewed

	@@ -0,0 +1,33 @@

+# Dataset Configuration
+# =====================
+# Local Datasets (from datasets folder)
+local_datasets:
+  enabled: true
+  path: '../datasets'
+# TabZilla Datasets (subset of 20)
+tabzilla:
+  enabled: false  # Enable when data is available
+  path: '../datasets/tabzilla'
+# OpenML-CC18 (Classification subset)
+openml_cc18:
+  enabled: false
+  path: '../datasets/openml_cc18'
+# Dataset Filters
+filters:
+  min_samples: 100
+  max_samples: 100000
+  min_features: 2
+  max_features: 1000
+  task_types:
+    - classification
+    - regression
+# Preprocessing
+preprocessing:
+  handle_missing: 'mean'  # mean, median, most_frequent, drop
+  encode_categoricals: true
+  scale_features: false  # Most models handle scaling internally

code/config/experiments.yaml ADDED Viewed

	@@ -0,0 +1,64 @@

+# Experiment Configuration
+# ========================
+# Cross-Validation Settings
+n_folds: 10
+random_state: 42
+timeout: 86400  # 24 hours per experiment
+# Compute Resources
+cost_per_hour: 0.90  # USD per GPU-hour (H200)
+gpu_type: 'H200'
+gpu_memory_limit: 80  # GB
+checkpoint_interval: 3600  # Save checkpoint every hour
+# Model-Specific Parameters
+model_params:
+  sap_rpt1:
+    context_size: 4096
+    bagging_factor: 4
+    model_size: 'small'  # or 'large'
+  sap_rpt1_hf:
+    max_context_size: 4096
+    bagging: 4
+  tabpfn:
+    n_ensemble: 1
+    device: 'auto'
+  autogluon:
+    time_limit: 300  # 5 minutes
+    preset: 'medium_quality'  # best_quality, high_quality, good_quality, medium_quality
+  xgboost:
+    n_estimators: 100
+    learning_rate: 0.1
+    max_depth: 6
+  catboost:
+    iterations: 100
+    learning_rate: 0.1
+    depth: 6
+  lightgbm:
+    n_estimators: 100
+    learning_rate: 0.1
+    max_depth: -1
+# Evaluation Metrics
+primary_metric:
+  classification: 'roc_auc'
+  regression: 'r2'
+# Statistical Testing
+statistical_tests:
+  friedman_alpha: 0.05
+  nemenyi_alpha: 0.05
+# Reproducibility
+reproducibility:
+  save_predictions: true
+  save_models: false  # Models can be large
+  log_hyperparameters: true
+  track_compute: true

code/config/models.yaml ADDED Viewed

	@@ -0,0 +1,84 @@

+# Model Configuration
+# ====================
+models:
+  # SAP RPT-1 (Primary Model)
+  - name: 'sap-rpt1-small'
+    enabled: true
+    priority: 'high'
+    docker_image: 'sap-rpt1'
+  - name: 'sap-rpt1-large'
+    enabled: true
+    priority: 'high'
+    docker_image: 'sap-rpt1'
+  # SAP RPT-1 OSS via Hugging Face (Open Source)
+  - name: 'sap-rpt1-hf'
+    enabled: true
+    priority: 'high'
+    docker_image: 'sap-rpt1'
+    description: 'SAP RPT-1 OSS model via HuggingFace token authentication'
+  # Pretrained Competitors
+  - name: 'tabpfn'
+    enabled: true
+    priority: 'high'
+    docker_image: 'tabpfn'
+  - name: 'tabicl'
+    enabled: false  # Enable when implementation ready
+    priority: 'medium'
+    docker_image: 'tabicl'
+  # AutoML
+  - name: 'autogluon'
+    enabled: true
+    priority: 'medium'
+    docker_image: 'autogluon'
+  # Gradient Boosting Baselines
+  - name: 'xgboost'
+    enabled: true
+    priority: 'medium'
+    docker_image: 'baselines'
+  - name: 'catboost'
+    enabled: true
+    priority: 'medium'
+    docker_image: 'baselines'
+  - name: 'lightgbm'
+    enabled: true
+    priority: 'low'
+    docker_image: 'baselines'
+# Model Groups (for batch experiments)
+model_groups:
+  all:
+    - sap-rpt1-small
+    - sap-rpt1-large
+    - sap-rpt1-hf
+    - tabpfn
+    - autogluon
+    - xgboost
+    - catboost
+    - lightgbm
+  pretrained_only:
+    - sap-rpt1-small
+    - sap-rpt1-large
+    - sap-rpt1-hf
+    - tabpfn
+  baselines_only:
+    - xgboost
+    - catboost
+    - lightgbm
+  high_priority:
+    - sap-rpt1-small
+    - sap-rpt1-large
+    - sap-rpt1-hf
+    - tabpfn

code/docker/Dockerfile ADDED Viewed

	@@ -0,0 +1,102 @@

+# =============================================================================
+# SAP RPT-1 Benchmarking - Multi-stage Dockerfile
+# =============================================================================
+# Builds two targets:
+#   - sap-rpt1: Python 3.11 with SAP RPT-1 OSS + all dependencies
+#   - baselines: Python 3.11 with XGBoost, CatBoost, LightGBM
+#
+# Usage:
+#   docker-compose build
+#   docker-compose run sap-rpt1
+#   docker-compose run baselines
+# =============================================================================
+# ---------- Base stage (shared by all targets) ----------
+FROM python:3.11-slim AS base
+# System dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+# Copy requirements first (for Docker layer caching)
+COPY requirements.txt /app/requirements.txt
+# ---------- SAP RPT-1 target ----------
+FROM base AS sap-rpt1
+# Install core scientific stack first (heavy packages)
+RUN pip install --default-timeout=1000 --retries 5 --no-cache-dir \
+    numpy==1.26.4 \
+    pandas==2.2.3 \
+    scikit-learn==1.6.1 \
+    scipy==1.14.1 \
+    matplotlib==3.9.2 \
+    seaborn==0.13.2
+# Install Hugging Face and PyTorch stack
+RUN pip install --default-timeout=1000 --retries 5 --no-cache-dir \
+    --extra-index-url https://download.pytorch.org/whl/cpu \
+    torch==2.7.0+cpu \
+    transformers==4.52.4 \
+    accelerate==1.6.0 \
+    huggingface-hub==0.30.2 \
+    datasets==3.5.0 \
+    pyarrow==20.0.0 \
+    torcheval==0.0.7
+# Install SAP RPT-1 and remaining requirements
+RUN pip install --default-timeout=1000 --retries 5 --no-cache-dir -r requirements.txt
+# Copy project code
+COPY . /app
+# Set Python path
+ENV PYTHONPATH=/app/code
+WORKDIR /app/code
+# Set entrypoint so you can run via arguments natively
+ENTRYPOINT ["python"]
+CMD ["-m", "runners.run_experiment", "--dataset", "adult", "--model", "sap-rpt1-hf"]
+# ---------- Baselines target ----------
+FROM base AS baselines
+# Install core scientific stack (heavy packages)
+RUN pip install --default-timeout=1000 --retries 5 --no-cache-dir \
+    numpy==1.26.4 \
+    pandas==2.2.3 \
+    scikit-learn==1.6.1 \
+    scipy==1.14.1
+# Install visualization and utilities
+RUN pip install --default-timeout=1000 --retries 5 --no-cache-dir \
+    matplotlib==3.9.2 \
+    seaborn==0.13.2 \
+    pyyaml==6.0.2 \
+    tqdm==4.67.1 \
+    joblib==1.4.2 \
+    python-dotenv==1.0.1
+# Install ML frameworks and OpenML
+RUN pip install --default-timeout=1000 --retries 5 --no-cache-dir \
+    openml==0.14.2 \
+    xgboost \
+    catboost \
+    lightgbm
+# Copy project code
+COPY . /app
+# Set Python path
+ENV PYTHONPATH=/app/code
+WORKDIR /app/code
+# Set entrypoint so you can run via arguments natively
+ENTRYPOINT ["python"]
+CMD ["-m", "runners.run_batch", "--datasets", "config/datasets.yaml", "--models", "config/models.yaml"]

code/evaluation/__init__.py ADDED Viewed

	@@ -0,0 +1,24 @@

+"""
+Evaluation Package
+==================
+Tools for model evaluation, statistical testing, and benchmarking.
+Author: UW MSIM Team
+Date: November 2025
+"""
+from .metrics import calculate_classification_metrics, calculate_regression_metrics
+from .cross_validation import run_cross_validation
+from .statistical_tests import friedman_test, nemenyi_post_hoc, critical_difference
+from .compute_tracker import ComputeTracker
+__all__ = [
+    'calculate_classification_metrics',
+    'calculate_regression_metrics',
+    'run_cross_validation',
+    'friedman_test',
+    'nemenyi_post_hoc',
+    'critical_difference',
+    'ComputeTracker'
+]

code/evaluation/compute_tracker.py ADDED Viewed

	@@ -0,0 +1,114 @@

+"""
+Compute Resource Tracker
+=========================
+Track GPU hours, costs, and resource usage for experiments.
+Author: UW MSIM Team
+Date: November 2025
+"""
+import time
+import numpy as np
+from typing import Dict, Optional, List
+try:
+    import psutil
+    HAS_PSUTIL = True
+except ImportError:
+    HAS_PSUTIL = False
+import logging
+logger = logging.getLogger(__name__)
+class ComputeTracker:
+    """
+    Track compute resources and costs.
+    Parameters
+    ----------
+    cost_per_hour : float
+        Cost per GPU-hour in USD
+    gpu_type : str
+        GPU type (e.g., 'H200', 'A100', 'L40S')
+    """
+    def __init__(self, cost_per_hour: float = 0.90, gpu_type: str = 'H200'):
+        self.cost_per_hour = cost_per_hour
+        self.gpu_type = gpu_type
+        self.start_time: Optional[float] = None
+        self.end_time: Optional[float] = None
+        self.gpu_usage_log: List[Dict] = []
+    def start(self):
+        """Start tracking."""
+        self.start_time = time.time()
+        self.gpu_usage_log = []
+        logger.info(f"Compute tracking started (GPU: {self.gpu_type}, ${self.cost_per_hour}/hr)")
+    def log_gpu_usage(self):
+        """Log current GPU usage."""
+        try:
+            import GPUtil
+            gpus = GPUtil.getGPUs()
+            for gpu in gpus:
+                self.gpu_usage_log.append({
+                    'timestamp': time.time(),
+                    'gpu_id': gpu.id,
+                    'gpu_load': gpu.load * 100,
+                    'memory_used_mb': gpu.memoryUsed,
+                    'memory_total_mb': gpu.memoryTotal,
+                    'memory_util': (gpu.memoryUsed / gpu.memoryTotal) * 100,
+                    'temperature': getattr(gpu, 'temperature', None)
+                })
+        except ImportError:
+            logger.warning("GPUtil not installed, GPU tracking unavailable")
+        except Exception as e:
+            logger.warning(f"GPU logging failed: {e}")
+    def stop(self) -> Dict:
+        """
+        Stop tracking and calculate costs.
+        Returns
+        -------
+        summary : dict
+            Elapsed time, costs, and GPU usage summary
+        """
+        self.end_time = time.time()
+        elapsed_hours = (self.end_time - self.start_time) / 3600
+        total_cost = elapsed_hours * self.cost_per_hour
+        # CPU usage
+        if HAS_PSUTIL:
+            cpu_percent = psutil.cpu_percent(interval=1)
+            memory_info = psutil.virtual_memory()
+            memory_percent = memory_info.percent
+            memory_used_gb = memory_info.used / (1024 ** 3)
+        else:
+            cpu_percent = 0.0
+            memory_percent = 0.0
+            memory_used_gb = 0.0
+        summary = {
+            'elapsed_hours': elapsed_hours,
+            'cost_usd': total_cost,
+            'cost_per_hour': self.cost_per_hour,
+            'gpu_type': self.gpu_type,
+            'cpu_percent': cpu_percent,
+            'memory_percent': memory_percent,
+            'memory_used_gb': memory_used_gb,
+            'gpu_logs_count': len(self.gpu_usage_log)
+        }
+        # Average GPU utilization
+        if self.gpu_usage_log:
+            summary['avg_gpu_load'] = np.mean([log['gpu_load'] for log in self.gpu_usage_log])
+            summary['avg_gpu_memory_util'] = np.mean([log['memory_util'] for log in self.gpu_usage_log])
+        logger.info(f"Compute tracking stopped: {elapsed_hours:.2f} hours, ${total_cost:.2f}")
+        return summary

code/evaluation/cross_validation.py ADDED Viewed

	@@ -0,0 +1,127 @@

+"""
+Cross-Validation
+================
+10-fold stratified cross-validation for model evaluation.
+Author: UW MSIM Team
+Date: November 2025
+"""
+import numpy as np
+import pandas as pd
+from sklearn.model_selection import StratifiedKFold, KFold
+from sklearn.preprocessing import LabelEncoder
+from typing import List, Dict
+import logging
+from .metrics import calculate_classification_metrics, calculate_regression_metrics
+logger = logging.getLogger(__name__)
+def _encode_categorical_columns(X_train, X_val):
+    """
+    Label-encode object/categorical columns. Fitted on X_train,
+    applied to both X_train and X_val. Unknown categories in X_val
+    are mapped to -1.
+    """
+    X_train = X_train.copy()
+    X_val = X_val.copy()
+    cat_cols = X_train.select_dtypes(include=['object', 'category']).columns
+    if len(cat_cols) == 0:
+        return X_train, X_val
+    logger.info(f"  Encoding {len(cat_cols)} categorical columns: {list(cat_cols[:5])}{'...' if len(cat_cols) > 5 else ''}")
+    for col in cat_cols:
+        le = LabelEncoder()
+        # Fit on combined unique values from train (+ handle unseen in val)
+        combined = pd.concat([X_train[col], X_val[col]], axis=0).astype(str)
+        le.fit(combined)
+        X_train[col] = le.transform(X_train[col].astype(str))
+        X_val[col] = le.transform(X_val[col].astype(str))
+    return X_train, X_val
+def run_cross_validation(
+    model,
+    X: pd.DataFrame,
+    y: pd.Series,
+    task_type: str = 'classification',
+    n_folds: int = 10,
+    random_state: int = 42
+) -> List[Dict]:
+    """
+    Run k-fold cross-validation.
+    Parameters
+    ----------
+    model : BaseModelWrapper
+        Model to evaluate (must have fit/predict methods)
+    X : pd.DataFrame
+        Features
+    y : pd.Series
+        Target
+    task_type : str
+        'classification' or 'regression'
+    n_folds : int
+        Number of folds
+    random_state : int
+        Random seed
+    Returns
+    -------
+    fold_results : list of dict
+        Results for each fold
+    """
+    logger.info(f"Running {n_folds}-fold CV for {model.__class__.__name__}")
+    # Choose CV splitter
+    if task_type == 'classification':
+        cv = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=random_state)
+    else:
+        cv = KFold(n_splits=n_folds, shuffle=True, random_state=random_state)
+    fold_results = []
+    for fold_idx, (train_idx, val_idx) in enumerate(cv.split(X, y)):
+        logger.info(f"  Fold {fold_idx + 1}/{n_folds}")
+        # Split data
+        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
+        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
+        # Auto-encode categorical columns so tree models can handle them
+        X_train, X_val = _encode_categorical_columns(X_train, X_val)
+        # Fit model
+        model.fit(X_train, y_train)
+        # Predict
+        y_pred = model.predict(X_val)
+        y_proba = None
+        if task_type == 'classification':
+            try:
+                y_proba = model.predict_proba(X_val)
+            except:
+                pass
+        # Calculate metrics
+        if task_type == 'classification':
+            metrics = calculate_classification_metrics(y_val, y_pred, y_proba)
+        else:
+            metrics = calculate_regression_metrics(y_val, y_pred)
+        # Add timing info
+        metrics.update({
+            'fold': fold_idx,
+            'fit_time': model.fit_time,
+            'predict_time': model.predict_time
+        })
+        fold_results.append(metrics)
+    return fold_results

code/evaluation/metrics.py ADDED Viewed

	@@ -0,0 +1,116 @@

+"""
+Evaluation Metrics
+==================
+Comprehensive metrics for classification and regression tasks.
+Author: UW MSIM Team
+Date: November 2025
+"""
+import numpy as np
+from sklearn.metrics import (
+    roc_auc_score, accuracy_score, f1_score, precision_score, recall_score,
+    r2_score, mean_squared_error, mean_absolute_error, log_loss
+)
+from typing import Dict, Optional
+import logging
+logger = logging.getLogger(__name__)
+def calculate_classification_metrics(
+    y_true: np.ndarray,
+    y_pred: np.ndarray,
+    y_proba: Optional[np.ndarray] = None
+) -> Dict[str, float]:
+    """
+    Calculate all classification metrics.
+    Parameters
+    ----------
+    y_true : np.ndarray
+        True labels
+    y_pred : np.ndarray
+        Predicted labels
+    y_proba : np.ndarray, optional
+        Predicted probabilities (n_samples, n_classes)
+    Returns
+    -------
+    metrics : dict
+        Dictionary of metric names and values
+    """
+    metrics = {
+        'accuracy': accuracy_score(y_true, y_pred),
+        'f1_macro': f1_score(y_true, y_pred, average='macro', zero_division=0),
+        'f1_weighted': f1_score(y_true, y_pred, average='weighted', zero_division=0),
+        'precision_macro': precision_score(y_true, y_pred, average='macro', zero_division=0),
+        'recall_macro': recall_score(y_true, y_pred, average='macro', zero_division=0)
+    }
+    # ROC-AUC (if probabilities available)
+    if y_proba is not None:
+        try:
+            n_classes = len(np.unique(y_true))
+            if n_classes == 2:
+                # Binary classification
+                metrics['roc_auc'] = roc_auc_score(y_true, y_proba[:, 1])
+            else:
+                # Multi-class classification
+                metrics['roc_auc'] = roc_auc_score(
+                    y_true, y_proba,
+                    multi_class='ovr',
+                    average='macro'
+                )
+            # Log loss
+            metrics['log_loss'] = log_loss(y_true, y_proba)
+        except Exception as e:
+            logger.warning(f"ROC-AUC calculation failed: {e}")
+            metrics['roc_auc'] = np.nan
+            metrics['log_loss'] = np.nan
+    return metrics
+def calculate_regression_metrics(
+    y_true: np.ndarray,
+    y_pred: np.ndarray
+) -> Dict[str, float]:
+    """
+    Calculate all regression metrics.
+    Parameters
+    ----------
+    y_true : np.ndarray
+        True values
+    y_pred : np.ndarray
+        Predicted values
+    Returns
+    -------
+    metrics : dict
+        Dictionary of metric names and values
+    """
+    metrics = {
+        'r2': r2_score(y_true, y_pred),
+        'rmse': np.sqrt(mean_squared_error(y_true, y_pred)),
+        'mae': mean_absolute_error(y_true, y_pred),
+        'mse': mean_squared_error(y_true, y_pred)
+    }
+    # MAPE (avoid division by zero)
+    try:
+        non_zero_mask = y_true != 0
+        if np.any(non_zero_mask):
+            mape = np.mean(np.abs((y_true[non_zero_mask] - y_pred[non_zero_mask]) / y_true[non_zero_mask])) * 100
+            metrics['mape'] = mape
+        else:
+            metrics['mape'] = np.nan
+    except:
+        metrics['mape'] = np.nan
+    return metrics

code/evaluation/statistical_tests.py ADDED Viewed

	@@ -0,0 +1,109 @@

+"""
+Statistical Tests
+=================
+Statistical significance testing for model comparisons.
+Implements:
+- Friedman test (non-parametric ANOVA)
+- Nemenyi post-hoc test
+- Critical difference calculation
+Author: UW MSIM Team
+Date: November 2025
+"""
+import numpy as np
+import pandas as pd
+from scipy import stats
+from typing import Dict, Tuple
+import logging
+logger = logging.getLogger(__name__)
+def friedman_test(results_df: pd.DataFrame) -> Dict:
+    """
+    Friedman test for comparing multiple models.
+    Parameters
+    ----------
+    results_df : pd.DataFrame
+        Rows = datasets, columns = models, values = metric scores
+    Returns
+    -------
+    results : dict
+        Test statistic, p-value, and significance
+    """
+    # Rank models for each dataset (higher is better)
+    ranks = results_df.rank(axis=1, ascending=False)
+    # Friedman test
+    stat, p_value = stats.friedmanchisquare(*[ranks[col] for col in ranks.columns])
+    logger.info(f"Friedman Test: statistic={stat:.4f}, p-value={p_value:.4e}")
+    return {
+        'statistic': stat,
+        'p_value': p_value,
+        'significant': p_value < 0.05,
+        'avg_ranks': ranks.mean().to_dict()
+    }
+def nemenyi_post_hoc(results_df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Nemenyi post-hoc test (pairwise comparisons).
+    Parameters
+    ----------
+    results_df : pd.DataFrame
+        Rows = datasets, columns = models, values = metric scores
+    Returns
+    -------
+    p_values : pd.DataFrame
+        Pairwise p-values
+    """
+    try:
+        import scikit_posthocs as sp
+        ranks = results_df.rank(axis=1, ascending=False)
+        p_values = sp.posthoc_nemenyi_friedman(ranks.T)
+        return p_values
+    except ImportError:
+        logger.error("scikit-posthocs not installed. Install with: pip install scikit-posthocs")
+        raise
+def critical_difference(
+    n_datasets: int,
+    n_models: int,
+    alpha: float = 0.05
+) -> float:
+    """
+    Calculate critical difference for CD diagrams.
+    Parameters
+    ----------
+    n_datasets : int
+        Number of datasets
+    n_models : int
+        Number of models
+    alpha : float
+        Significance level
+    Returns
+    -------
+    cd : float
+        Critical difference value
+    """
+    # Critical value from Nemenyi distribution
+    # Approximation using normal distribution
+    q_alpha = stats.norm.ppf(1 - alpha / 2)
+    cd = q_alpha * np.sqrt((n_models * (n_models + 1)) / (6 * n_datasets))
+    logger.info(f"Critical Difference: {cd:.4f} (alpha={alpha})")
+    return cd

{webapp → code}/models/__init__.py RENAMED Viewed

File without changes

{webapp → code}/models/autogluon_wrapper.py RENAMED Viewed

File without changes

{webapp → code}/models/base_wrapper.py RENAMED Viewed

File without changes

{webapp → code}/models/baseline_wrappers.py RENAMED Viewed

File without changes

{webapp → code}/models/sap_rpt1_hf_wrapper.py RENAMED Viewed

File without changes

{webapp → code}/models/sap_rpt1_wrapper.py RENAMED Viewed

File without changes

{webapp → code}/models/tabicl_wrapper.py RENAMED Viewed

File without changes

{webapp → code}/models/tabpfn_wrapper.py RENAMED Viewed

@@ -18,19 +18,9 @@ from typing import Optional, Union
 import numpy as np
 import pandas as pd
-# ── TabPFN non-interactive authentication ─────────────────────────────────────
-# For TabPFN v2 (PriorLabs), set TABPFN_TOKEN from the HF Space secret.
-# The user must add TABPFN_TOKEN as a secret in HF Space settings.
-_tabpfn_token = os.environ.get("TABPFN_TOKEN", "")
-if _tabpfn_token:
-    os.environ["TABPFN_TOKEN"] = _tabpfn_token  # ensure it's set for child processes
-# Cover all license-acceptance env var names across TabPFN versions.
-os.environ["TABPFN_ACCEPT_LICENSE"]   = "1"
-os.environ["TABPFN_LICENSE"]          = "accept"
-os.environ["TABPFN_ACCEPT_TERMS"]     = "1"
-os.environ["TABPFN_LICENSE_ACCEPTED"] = "1"
-os.environ["AGREE_TABPFN_LICENSE"]    = "1"
 # ── Patch for old TabPFN compatibility with newer torch ──────────────────────
 try:
@@ -84,10 +74,6 @@ class TabPFNWrapper(BaseModelWrapper):
         Random seed
     """
-    # Class-level cache: weights are loaded once and shared across ALL instances
-    # in the same process. This prevents reloading 103 weight files on every CV fold.
-    _shared_classifier = None
     def __init__(
         self,
         task_type: str = 'classification',
@@ -106,6 +92,18 @@ class TabPFNWrapper(BaseModelWrapper):
     def fit(self, X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray]) -> 'TabPFNWrapper':
         """
         Fit TabPFN (stores training data for in-context learning).
         """
         self._validate_input(X, y)
@@ -141,35 +139,18 @@ class TabPFNWrapper(BaseModelWrapper):
         try:
             from tabpfn import TabPFNClassifier
             import tabpfn
-            # Reuse class-level cached classifier so weights are only loaded ONCE
-            # per process, not once per CV fold.
-            if TabPFNWrapper._shared_classifier is None:
-                logger.info("Creating new TabPFNClassifier and caching at class level...")
-                # TabPFN v2: no device/N_ensemble args; token read from TABPFN_TOKEN env var.
-                # TabPFN v0.1.x: needs device + N_ensemble_configurations.
-                version = getattr(tabpfn, '__version__', '0')
-                if version.startswith('0.1'):
-                    import torch
-                    actual_device = 'cuda' if (self.device == 'auto' and torch.cuda.is_available()) else 'cpu'
-                    TabPFNWrapper._shared_classifier = TabPFNClassifier(
-                        device=actual_device,
-                        N_ensemble_configurations=self.n_ensemble
-                    )
-                else:
-                    # v2+: just instantiate — auth is via TABPFN_TOKEN env var
-                    TabPFNWrapper._shared_classifier = TabPFNClassifier()
             else:
-                logger.info("Reusing cached TabPFN classifier (weights NOT reloaded).")
-            self.model = TabPFNWrapper._shared_classifier
-            # Fit — v0.1.x accepts overwrite_warning=True; v2+ does not.
-            try:
-                self.model.fit(X, y, overwrite_warning=True)
-            except TypeError:
-                self.model.fit(X, y)
             self.is_fitted = True
             self.fit_time = time.time() - start_time

 import numpy as np
 import pandas as pd
+# Automatically accept the TabPFN license to prevent browser/socket crashes on Windows
+os.environ["TABPFN_LICENSE"] = "accept"
+os.environ["TABPFN_ACCEPT_LICENSE"] = "1"
 # ── Patch for old TabPFN compatibility with newer torch ──────────────────────
 try:
         Random seed
     """
     def __init__(
         self,
         task_type: str = 'classification',
     def fit(self, X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray]) -> 'TabPFNWrapper':
         """
         Fit TabPFN (stores training data for in-context learning).
+        Parameters
+        ----------
+        X : pd.DataFrame or np.ndarray, shape (n_samples, n_features)
+            Training features (max 1000 samples, 100 features)
+        y : pd.Series or np.ndarray, shape (n_samples,)
+            Training target
+        Returns
+        -------
+        self : TabPFNWrapper
+            Fitted model
         """
         self._validate_input(X, y)
         try:
             from tabpfn import TabPFNClassifier
+            import torch
             import tabpfn
+            actual_device = 'cuda' if (self.device == 'auto' and torch.cuda.is_available()) else ('cpu' if self.device == 'auto' else self.device)
+            if hasattr(tabpfn, '__version__') and tabpfn.__version__.startswith('0.1'):
+                self.model = TabPFNClassifier(device=actual_device, N_ensemble_configurations=self.n_ensemble)
             else:
+                self.model = TabPFNClassifier(device=actual_device)
+            # Fit model
+            self.model.fit(X, y)
             self.is_fitted = True
             self.fit_time = time.time() - start_time

code/runners/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+"""
+Experiment Runners Package
+===========================
+Tools for executing benchmarking experiments.
+Author: UW MSIM Team
+Date: November 2025
+"""
+__all__ = ['run_experiment', 'run_batch']

code/runners/run_baselines.py ADDED Viewed

	@@ -0,0 +1,50 @@

+"""
+Baseline Models Batch Runner
+==============================
+Run all baseline models (XGBoost, CatBoost, LightGBM) on all or specific datasets.
+Usage:
+    # Run on ALL datasets
+    py -3.12 -m runners.run_baselines
+    # Run on specific datasets
+    py -3.12 -m runners.run_baselines --dataset analcatdata_authorship diabetes
+Author: UW MSIM Team
+Date: April 2026
+"""
+import argparse
+import sys
+from pathlib import Path
+# Add parent directory to path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from runners.run_batch import main as run_batch_main
+BASELINE_MODELS = ['xgboost', 'catboost', 'lightgbm']
+def main():
+    """Run all baseline models on all or specific datasets."""
+    parser = argparse.ArgumentParser(description='Run baseline models')
+    parser.add_argument('--dataset', nargs='*', default=None,
+                        help='Specific dataset(s) to run (e.g., --dataset analcatdata_authorship diabetes)')
+    args = parser.parse_args()
+    # Build sys.argv for run_batch
+    batch_args = ['run_baselines', '--model-filter', *BASELINE_MODELS]
+    if args.dataset:
+        batch_args.extend(['--dataset-filter', *args.dataset])
+    sys.argv = batch_args
+    run_batch_main()
+if __name__ == '__main__':
+    main()

code/runners/run_batch.py ADDED Viewed

	@@ -0,0 +1,289 @@

+"""
+Batch Experiment Runner
+========================
+Run multiple models on multiple datasets.
+Usage:
+    python -m runners.run_batch \
+        --datasets config/datasets.yaml \
+        --models config/models.yaml
+Author: UW MSIM Team
+Date: April 2026
+"""
+import argparse
+import yaml
+import logging
+import sys
+import os
+import json
+import time
+from pathlib import Path
+from typing import List, Dict, Optional
+# Add parent directory to path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from runners.run_experiment import run_single_experiment, get_model
+logger = logging.getLogger(__name__)
+def get_dataset_list(datasets_config: dict, dataset_dir: str = None) -> List[str]:
+    """
+    Get list of available dataset names from the download directory.
+    Parameters
+    ----------
+    datasets_config : dict
+        Datasets YAML configuration
+    dataset_dir : str
+        Directory containing downloaded datasets
+    Returns
+    -------
+    datasets : list of str
+        List of dataset names
+    """
+    datasets = []
+    if dataset_dir is None:
+        dataset_dir = str(Path(__file__).parent.parent.parent / 'datasets')
+    if os.path.isdir(dataset_dir):
+        # Find all *_X.csv files and extract dataset names
+        for f in sorted(os.listdir(dataset_dir)):
+            if f.endswith('_X.csv'):
+                name = f[:-6]  # Remove '_X.csv'
+                # Verify y file also exists
+                y_file = os.path.join(dataset_dir, f"{name}_y.csv")
+                if os.path.exists(y_file):
+                    datasets.append(name)
+        logger.info(f"Found {len(datasets)} datasets in {dataset_dir}")
+    else:
+        logger.warning(f"Dataset directory not found: {dataset_dir}")
+    return datasets
+def get_model_list(models_config: dict) -> List[str]:
+    """
+    Get list of enabled model names from configuration.
+    Parameters
+    ----------
+    models_config : dict
+        Models YAML configuration
+    Returns
+    -------
+    models : list of str
+        List of enabled model names
+    """
+    models = []
+    for model_entry in models_config.get('models', []):
+        if model_entry.get('enabled', True):
+            models.append(model_entry['name'])
+    return models
+def run_batch_experiments(
+    datasets: List[str],
+    models: List[str],
+    experiment_config: dict,
+    output_dir: str = '../results/raw',
+    skip_existing: bool = True
+) -> dict:
+    """
+    Run experiments for all dataset × model combinations.
+    Parameters
+    ----------
+    datasets : list of str
+        Dataset names
+    models : list of str
+        Model names
+    experiment_config : dict
+        Experiment configuration (n_folds, random_state, etc.)
+    output_dir : str
+        Where to save results
+    skip_existing : bool
+        If True, skip experiments that already have result files
+    Returns
+    -------
+    summary : dict
+        Batch run summary with successes and failures
+    """
+    total_experiments = len(datasets) * len(models)
+    logger.info(f"\n{'='*60}")
+    logger.info(f"BATCH RUN: {len(datasets)} datasets × {len(models)} models = {total_experiments} experiments")
+    logger.info(f"{'='*60}\n")
+    summary = {
+        'total': total_experiments,
+        'completed': 0,
+        'skipped': 0,
+        'failed': 0,
+        'results': [],
+        'errors': []
+    }
+    batch_start_time = time.time()
+    for i, dataset_name in enumerate(datasets):
+        for j, model_name in enumerate(models):
+            experiment_num = i * len(models) + j + 1
+            output_file = os.path.join(output_dir, f"{dataset_name}_{model_name}.json")
+            # Skip existing results
+            if skip_existing and os.path.exists(output_file):
+                logger.info(
+                    f"[{experiment_num}/{total_experiments}] "
+                    f"SKIP {model_name} on {dataset_name} (result exists)"
+                )
+                summary['skipped'] += 1
+                continue
+            logger.info(
+                f"\n[{experiment_num}/{total_experiments}] "
+                f"Running {model_name} on {dataset_name}..."
+            )
+            try:
+                result = run_single_experiment(
+                    dataset_name=dataset_name,
+                    model_name=model_name,
+                    config=experiment_config,
+                    output_dir=output_dir
+                )
+                summary['completed'] += 1
+                summary['results'].append({
+                    'dataset': dataset_name,
+                    'model': model_name,
+                    'status': 'success'
+                })
+            except Exception as e:
+                logger.error(f"FAILED: {model_name} on {dataset_name}: {e}")
+                summary['failed'] += 1
+                summary['errors'].append({
+                    'dataset': dataset_name,
+                    'model': model_name,
+                    'error': str(e)
+                })
+    batch_elapsed = time.time() - batch_start_time
+    # Print summary
+    logger.info(f"\n{'='*60}")
+    logger.info(f"BATCH RUN COMPLETE")
+    logger.info(f"{'='*60}")
+    logger.info(f"  Total experiments: {summary['total']}")
+    logger.info(f"  Completed: {summary['completed']}")
+    logger.info(f"  Skipped: {summary['skipped']}")
+    logger.info(f"  Failed: {summary['failed']}")
+    logger.info(f"  Total time: {batch_elapsed / 3600:.2f} hours")
+    logger.info(f"{'='*60}\n")
+    # Save batch summary
+    os.makedirs(output_dir, exist_ok=True)
+    summary_file = os.path.join(output_dir, '_batch_summary.json')
+    summary['elapsed_hours'] = batch_elapsed / 3600
+    with open(summary_file, 'w') as f:
+        json.dump(summary, f, indent=2)
+    logger.info(f"Batch summary saved to {summary_file}")
+    return summary
+def main():
+    """Entry point for batch runner."""
+    # Setup logging
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+    )
+    # Parse arguments
+    parser = argparse.ArgumentParser(description='Run batch benchmarking experiments')
+    parser.add_argument('--datasets', default='config/datasets.yaml',
+                        help='Datasets config file')
+    parser.add_argument('--models', default='config/models.yaml',
+                        help='Models config file')
+    parser.add_argument('--config', default='config/experiments.yaml',
+                        help='Experiment config file')
+    parser.add_argument('--output-dir', default='../results/raw',
+                        help='Output directory')
+    parser.add_argument('--dataset-dir', default=None,
+                        help='Directory containing downloaded datasets')
+    parser.add_argument('--no-skip', action='store_true',
+                        help='Re-run experiments even if results exist')
+    parser.add_argument('--model-filter', nargs='*', default=None,
+                        help='Only run specific models (e.g., --model-filter sap-rpt1-hf xgboost)')
+    parser.add_argument('--dataset-filter', nargs='*', default=None,
+                        help='Only run specific datasets')
+    args = parser.parse_args()
+    # Load configs
+    if os.path.exists(args.datasets):
+        with open(args.datasets) as f:
+            datasets_config = yaml.safe_load(f)
+    else:
+        datasets_config = {}
+    if os.path.exists(args.models):
+        with open(args.models) as f:
+            models_config = yaml.safe_load(f)
+    else:
+        models_config = {}
+    if os.path.exists(args.config):
+        with open(args.config) as f:
+            experiment_config = yaml.safe_load(f)
+    else:
+        experiment_config = {
+            'n_folds': 10,
+            'random_state': 42,
+            'cost_per_hour': 0.90,
+            'gpu_type': 'H200'
+        }
+    # Get dataset and model lists
+    dataset_list = args.dataset_filter or get_dataset_list(datasets_config, args.dataset_dir)
+    model_list = args.model_filter or get_model_list(models_config)
+    if not dataset_list:
+        print("[ERROR] No datasets found in the datasets directory.")
+        sys.exit(1)
+    if not model_list:
+        print("[ERROR] No models enabled in config. Check config/models.yaml")
+        sys.exit(1)
+    print(f"\n[INFO] Datasets ({len(dataset_list)}): {dataset_list[:5]}{'...' if len(dataset_list) > 5 else ''}")
+    print(f"[INFO] Models ({len(model_list)}): {model_list}")
+    # Add dataset_dir to config for run_experiment to use
+    experiment_config['dataset_dir'] = args.dataset_dir if args.dataset_dir else str(Path(__file__).parent.parent.parent / 'datasets')
+    # Run batch
+    summary = run_batch_experiments(
+        datasets=dataset_list,
+        models=model_list,
+        experiment_config=experiment_config,
+        output_dir=args.output_dir,
+        skip_existing=not args.no_skip
+    )
+    print(f"\n[SUCCESS] Batch complete! {summary['completed']} succeeded, {summary['failed']} failed")
+if __name__ == "__main__":
+    main()

code/runners/run_experiment.py ADDED Viewed

	@@ -0,0 +1,260 @@

+"""
+Single Experiment Runner
+=========================
+Run a single model on a single dataset.
+Usage:
+    python -m runners.run_experiment --dataset adult --model sap-rpt1
+Author: UW MSIM Team
+Date: November 2025
+"""
+import argparse
+import json
+import yaml
+import logging
+import sys
+import os
+from pathlib import Path
+# Add parent directory to path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from models import *
+from datasets.preprocessors import load_dataset
+from datasets.dataset_catalog import DatasetCatalog
+from evaluation import run_cross_validation, ComputeTracker
+logger = logging.getLogger(__name__)
+def get_model(model_name: str, task_type: str, config: dict):
+    """
+    Initialize model by name.
+    Parameters
+    ----------
+    model_name : str
+        Model identifier
+    task_type : str
+        'classification' or 'regression'
+    config : dict
+        Model configuration
+    Returns
+    -------
+    model : BaseModelWrapper
+        Initialized model
+    """
+    model_map = {
+        'sap-rpt1': SAPRPT1Wrapper,
+        'sap-rpt1-small': lambda **kwargs: SAPRPT1Wrapper(model_size='small', **kwargs),
+        'sap-rpt1-large': lambda **kwargs: SAPRPT1Wrapper(model_size='large', **kwargs),
+        'sap-rpt1-hf': SAPRPT1HFWrapper,
+        'tabpfn': TabPFNWrapper,
+        'tabicl': TabICLWrapper,
+        'autogluon': AutoGluonWrapper,
+        'xgboost': XGBoostWrapper,
+        'catboost': CatBoostWrapper,
+        'lightgbm': LightGBMWrapper
+    }
+    if model_name not in model_map:
+        raise ValueError(f"Unknown model: {model_name}. Choose from {list(model_map.keys())}")
+    model_class = model_map[model_name]
+    # Get specific parameters for this model
+    model_config_key = model_name.replace('-', '_')
+    # Special handling for size variants like sap-rpt1-small -> sap_rpt1
+    if model_name.startswith('sap-rpt1-') and model_name not in ['sap-rpt1-hf']:
+        model_config_key = 'sap_rpt1'
+    model_params = config.get('model_params', {}).get(model_config_key, {})
+    model = model_class(task_type=task_type, **model_params)
+    logger.info(f"Initialized {model_name} for {task_type}")
+    return model
+def run_single_experiment(
+    dataset_name: str,
+    model_name: str,
+    config: dict,
+    output_dir: str = '../results/raw'
+) -> dict:
+    """
+    Run experiment on single dataset with single model.
+    Parameters
+    ----------
+    dataset_name : str
+        Dataset name
+    model_name : str
+        Model name
+    config : dict
+        Experiment configuration
+    output_dir : str
+        Where to save results
+    Returns
+    -------
+    summary : dict
+        Experiment results
+    """
+    logger.info(f"\n{'='*60}")
+    logger.info(f"Experiment: {model_name} on {dataset_name}")
+    logger.info(f"{'='*60}\n")
+    # Create output directory
+    os.makedirs(output_dir, exist_ok=True)
+    # Start compute tracking
+    tracker = ComputeTracker(
+        cost_per_hour=config.get('cost_per_hour', 0.90),
+        gpu_type=config.get('gpu_type', 'H200')
+    )
+    tracker.start()
+    try:
+        # Load dataset
+        logger.info("Loading dataset...")
+        default_dataset_dir = str(Path(__file__).parent.parent.parent / 'datasets')
+        dataset_dir = config.get('dataset_dir', default_dataset_dir)
+        dataset_path = config.get('dataset_path', None)
+        if dataset_path and os.path.exists(dataset_path):
+            # Explicit path provided
+            X, y, task_type = load_dataset(dataset_path)
+        elif os.path.isdir(dataset_dir):
+            # Search for dataset files in the download directory
+            X_file = None
+            y_file = None
+            for f in os.listdir(dataset_dir):
+                fname_lower = f.lower()
+                dname_lower = dataset_name.lower()
+                if fname_lower == f"{dname_lower}_x.csv" or (fname_lower.endswith('_x.csv') and dname_lower in fname_lower):
+                    X_file = os.path.join(dataset_dir, f)
+                if fname_lower == f"{dname_lower}_y.csv" or (fname_lower.endswith('_y.csv') and dname_lower in fname_lower):
+                    y_file = os.path.join(dataset_dir, f)
+            if X_file and y_file:
+                import pandas as pd_load
+                X = pd_load.read_csv(X_file)
+                y = pd_load.read_csv(y_file).iloc[:, 0]
+                # Determine task type
+                if y.dtype == 'object' or len(y.unique()) < 20:
+                    task_type = 'classification'
+                else:
+                    task_type = 'regression'
+                logger.info(f"Loaded {dataset_name}: {X.shape[0]} samples, {X.shape[1]} features, task={task_type}")
+            else:
+                # Fallback: try as a single CSV file
+                csv_path = os.path.join(dataset_dir, f"{dataset_name}.csv")
+                if os.path.exists(csv_path):
+                    X, y, task_type = load_dataset(csv_path)
+                else:
+                    raise FileNotFoundError(
+                        f"Dataset '{dataset_name}' not found in {dataset_dir}.\n"
+                        f"Available files: {os.listdir(dataset_dir)[:10]}..."
+                    )
+        else:
+            raise FileNotFoundError(
+                f"Dataset directory not found: {dataset_dir}"
+            )
+        # Initialize model
+        model = get_model(model_name, task_type, config)
+        # Run cross-validation
+        fold_results = run_cross_validation(
+            model=model,
+            X=X,
+            y=y,
+            task_type=task_type,
+            n_folds=config.get('n_folds', 10),
+            random_state=config.get('random_state', 42)
+        )
+        # Stop tracking
+        compute_summary = tracker.stop()
+        # Aggregate results
+        import pandas as pd
+        results_df = pd.DataFrame(fold_results)
+        summary = {
+            'dataset': dataset_name,
+            'model': model_name,
+            'task_type': task_type,
+            'n_samples': len(X),
+            'n_features': X.shape[1],
+            'n_folds': config.get('n_folds', 10),
+            'mean_metrics': results_df.mean().to_dict(),
+            'std_metrics': results_df.std().to_dict(),
+            'fold_results': fold_results,
+            'compute': compute_summary
+        }
+        # Save results
+        output_file = os.path.join(output_dir, f"{dataset_name}_{model_name}.json")
+        with open(output_file, 'w') as f:
+            json.dump(summary, f, indent=2)
+        logger.info(f"\n[SUCCESS] Results saved to {output_file}")
+        # Print summary
+        primary_metric = 'roc_auc' if task_type == 'classification' else 'r2'
+        if primary_metric in summary['mean_metrics']:
+            mean_val = summary['mean_metrics'][primary_metric]
+            std_val = summary['std_metrics'][primary_metric]
+            logger.info(f"\nPrimary Metric ({primary_metric}): {mean_val:.4f} ± {std_val:.4f}")
+        return summary
+    except Exception as e:
+        logger.error(f"Experiment failed: {e}", exc_info=True)
+        raise
+if __name__ == "__main__":
+    # Setup logging
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+    )
+    # Parse arguments
+    parser = argparse.ArgumentParser(description='Run single benchmarking experiment')
+    parser.add_argument('--dataset', required=True, help='Dataset name')
+    parser.add_argument('--model', required=True, help='Model name')
+    parser.add_argument('--config', default='../config/experiments.yaml', help='Config file')
+    parser.add_argument('--output-dir', default='../results/raw', help='Output directory')
+    args = parser.parse_args()
+    # Load config
+    if os.path.exists(args.config):
+        with open(args.config) as f:
+            config = yaml.safe_load(f)
+    else:
+        config = {
+            'n_folds': 10,
+            'random_state': 42,
+            'cost_per_hour': 0.90,
+            'gpu_type': 'H200'
+        }
+    # Run experiment
+    results = run_single_experiment(
+        dataset_name=args.dataset,
+        model_name=args.model,
+        config=config,
+        output_dir=args.output_dir
+    )
+    print("\n[SUCCESS] Experiment complete!")

{webapp → code}/sap_rpt1.egg-info/PKG-INFO RENAMED Viewed

File without changes

code/sap_rpt1.egg-info/SOURCES.txt ADDED Viewed

	@@ -0,0 +1,28 @@

+README.md
+setup.py
+code/analysis/__init__.py
+code/analysis/aggregate_results.py
+code/evaluation/__init__.py
+code/evaluation/compute_tracker.py
+code/evaluation/cross_validation.py
+code/evaluation/metrics.py
+code/evaluation/statistical_tests.py
+code/models/__init__.py
+code/models/autogluon_wrapper.py
+code/models/base_wrapper.py
+code/models/baseline_wrappers.py
+code/models/sap_rpt1_hf_wrapper.py
+code/models/sap_rpt1_wrapper.py
+code/models/tabicl_wrapper.py
+code/models/tabpfn_wrapper.py
+code/runners/__init__.py
+code/runners/run_baselines.py
+code/runners/run_batch.py
+code/runners/run_experiment.py
+code/sap_rpt1.egg-info/PKG-INFO
+code/sap_rpt1.egg-info/SOURCES.txt
+code/sap_rpt1.egg-info/dependency_links.txt
+code/sap_rpt1.egg-info/requires.txt
+code/sap_rpt1.egg-info/top_level.txt
+code/utils/__init__.py
+code/utils/logging_utils.py

{webapp → code}/sap_rpt1.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{webapp → code}/sap_rpt1.egg-info/requires.txt RENAMED Viewed

File without changes

code/sap_rpt1.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+analysis
+evaluation
+models
+runners
+utils

code/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+"""
+Utilities Package
+=================
+Logging, result export, and helper functions.
+Author: UW MSIM Team
+Date: November 2025
+"""
+__all__ = []

code/utils/logging_utils.py ADDED Viewed

	@@ -0,0 +1,63 @@

+"""
+Logging Utilities
+=================
+Structured logging for experiments.
+Author: UW MSIM Team
+Date: November 2025
+"""
+import logging
+import sys
+from pathlib import Path
+def setup_logger(
+    name: str,
+    log_file: str = None,
+    level: int = logging.INFO,
+    format_string: str = None
+) -> logging.Logger:
+    """
+    Setup logger with file and console handlers.
+    Parameters
+    ----------
+    name : str
+        Logger name
+    log_file : str, optional
+        Log file path
+    level : int
+        Logging level
+    format_string : str, optional
+        Custom format string
+    Returns
+    -------
+    logger : logging.Logger
+        Configured logger
+    """
+    if format_string is None:
+        format_string = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+    # Create logger
+    logger = logging.getLogger(name)
+    logger.setLevel(level)
+    logger.handlers = []  # Clear existing handlers
+    # Console handler
+    console_handler = logging.StreamHandler(sys.stdout)
+    console_handler.setLevel(level)
+    console_handler.setFormatter(logging.Formatter(format_string))
+    logger.addHandler(console_handler)
+    # File handler (if specified)
+    if log_file:
+        Path(log_file).parent.mkdir(parents=True, exist_ok=True)
+        file_handler = logging.FileHandler(log_file)
+        file_handler.setLevel(level)
+        file_handler.setFormatter(logging.Formatter(format_string))
+        logger.addHandler(file_handler)
+    return logger

requirements.txt CHANGED Viewed

@@ -35,3 +35,4 @@ torcheval==0.0.7
 # SAP RPT-1 OSS model (pinned to release v1.1.2)
 sap-rpt-oss @ git+https://github.com/SAP-samples/sap-rpt-1-oss.git@v1.1.2


35
36	# SAP RPT-1 OSS model (pinned to release v1.1.2)
37	sap-rpt-oss @ git+https://github.com/SAP-samples/sap-rpt-1-oss.git@v1.1.2
38	+

setup.py CHANGED Viewed

@@ -3,8 +3,8 @@ from setuptools import setup, find_packages
 setup(
     name="sap-rpt1",
     version="0.1.0",
-    package_dir={"": "webapp"},
-    packages=find_packages(where="webapp"),
     install_requires=[
         "numpy>=1.26.4",
         "pandas>=2.2.3",

 setup(
     name="sap-rpt1",
     version="0.1.0",
+    package_dir={"": "code"},
+    packages=find_packages(where="code"),
     install_requires=[
         "numpy>=1.26.4",
         "pandas>=2.2.3",

webapp/benchmark.py CHANGED Viewed

@@ -16,21 +16,10 @@ from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
 warnings.filterwarnings("ignore")
-# Accept TabPFN license non-interactively — must be set before any tabpfn import.
-# Cover all env var names used across different TabPFN versions.
-os.environ.setdefault("TABPFN_ACCEPT_LICENSE",   "1")
-os.environ.setdefault("TABPFN_LICENSE",          "accept")
-os.environ.setdefault("TABPFN_ACCEPT_TERMS",     "1")
-os.environ.setdefault("TABPFN_LICENSE_ACCEPTED", "1")
-os.environ.setdefault("AGREE_TABPFN_LICENSE",    "1")
-# Flexible imports to support both HF Space (root) and local execution (inside webapp)
-try:
-    from webapp.models.tabpfn_wrapper import TabPFNWrapper
-except (ImportError, ModuleNotFoundError):
-    from models.tabpfn_wrapper import TabPFNWrapper
-N_FOLDS   = int(os.getenv("N_FOLDS",   "3"))
 RAND      = int(os.getenv("RANDOM_STATE", "42"))
 HF_TOKEN  = os.getenv("HUGGING_FACE_HUB_TOKEN", "")
@@ -65,20 +54,15 @@ def _cat(task):
 def _tabpfn(task):
     if task != "classification":
         raise ValueError("TabPFN only supports classification tasks")
-    # TabPFNWrapper uses a class-level _shared_classifier so weights are only
-    # loaded once per process regardless of how many instances are created.
     return TabPFNWrapper(task_type=task, random_state=RAND)
 class _SAPModel:
     """
     Tries the real SAP RPT-1 OSS via HuggingFace; falls back to k-NN simulator
     if the package is not installed or authentication fails.
     """
-    # Class-level cache to avoid reloading Transformer weights on every fold
-    _shared_model = None
     def __init__(self, task):
         self.task = task
         self._real = False
@@ -86,19 +70,14 @@ class _SAPModel:
         if HF_TOKEN:
             try:
-                # Reuse shared model if available
-                if _SAPModel._shared_model is not None and _SAPModel._shared_model.task == task:
-                    self._model = _SAPModel._shared_model._model
-                    self._real  = True
                 else:
-                    from sap_rpt_oss import SAP_RPT_OSS_Classifier, SAP_RPT_OSS_Regressor
-                    if task == "classification":
-                        self._model = SAP_RPT_OSS_Classifier(max_context_size=2048, bagging=1)
-                    else:
-                        self._model = SAP_RPT_OSS_Regressor(max_context_size=2048, bagging=1)
-                    self._real = True
-                    # Store in class-level cache
-                    _SAPModel._shared_model = self
             except Exception:
                 self._init_sim()
         else:
@@ -123,23 +102,12 @@ class _SAPModel:
         return self
     def predict(self, X):
-        if self._real and len(X) == 1:
-            # SAP RPT-1 OSS crashes on single-row input (np.concatenate on 0-dim arrays).
-            # Duplicate the row to form a 2-row batch, then return only the first result.
-            import pandas as pd
-            X_pad = pd.concat([X, X], ignore_index=True) if hasattr(X, 'iloc') else np.vstack([X, X])
-            preds = self._model.predict(X_pad)[:1]
-        else:
-            preds = self._model.predict(X)
         if not self._real and self.task == "classification":
             preds = self._le.inverse_transform(preds)
         return preds
     def predict_proba(self, X):
-        if self._real and len(X) == 1:
-            import pandas as pd
-            X_pad = pd.concat([X, X], ignore_index=True) if hasattr(X, 'iloc') else np.vstack([X, X])
-            return self._model.predict_proba(X_pad)[:1]
         return self._model.predict_proba(X)
     @property
@@ -227,17 +195,14 @@ def _run_cv(builder, X, y, task):
     else:
         splits = list(KFold(N_FOLDS, shuffle=True, random_state=RAND).split(X))
-    # Pre-fit encoders on the full dataset to ensure consistent feature space
-    X_full_p, global_encoders = _prep(X)
     fold_results = []
     for tr_idx, val_idx in splits:
         Xtr, Xval = X.iloc[tr_idx], X.iloc[val_idx]
         ytr, yval = y.iloc[tr_idx], y.iloc[val_idx]
-        # Apply the global encoders to both splits
-        Xtr_p, _  = _prep(Xtr, encoders=global_encoders)
-        Xval_p, _ = _prep(Xval, encoders=global_encoders)
         model = builder(task)
         if task == "classification":
@@ -356,25 +321,16 @@ def _statistical_analysis(results: dict, task: str) -> dict:
         return {}
     # Extract scores per fold for each model
-    # Only include models that have a consistent number of folds
-    temp_matrix = {}
-    max_folds = 0
     for name in model_names:
         folds = results[name].get("folds", [])
-        if not folds: continue
         scores = [f.get(primary, 0) for f in folds]
-        temp_matrix[name] = scores
-        max_folds = max(max_folds, len(scores))
-    if max_folds == 0: return {}
-    # Final list of models that have the full fold count
-    valid_names = [n for n, s in temp_matrix.items() if len(s) == max_folds]
-    if len(valid_names) < 2: return {}
-    matrix = np.array([temp_matrix[n] for n in valid_names]).T  # Shape: (n_folds, n_models)
-    n_folds = max_folds
-    model_names = valid_names  # Update model_names to match matrix columns
     # Calculate ranks for each fold (row)
     # Higher score = lower rank (1 is best). Using method='min' for competition ranking (ties get same best rank)
@@ -440,21 +396,12 @@ def run_benchmark(df: pd.DataFrame, target_col: str) -> dict:
     dict with keys: dataset_info, task, results, ensemble_info, recommendation
     """
     try:
-        from webapp.ensemble import select_top_models, run_voting_ensemble, run_stacking_ensemble, SKLEARN_SAFE
-    except (ImportError, ModuleNotFoundError):
         from ensemble import select_top_models, run_voting_ensemble, run_stacking_ensemble, SKLEARN_SAFE
     y_raw = df[target_col].copy()
     X     = df.drop(columns=[target_col]).copy()
-    # Subsample for benchmarking if the dataset is too large (>1000 rows)
-    # This prevents the "decades of time" issue on Hugging Face CPU spaces.
-    if len(df) > 1000:
-        print(f"Subsampling dataset from {len(df)} to 1000 rows for benchmarking speed.")
-        df = df.sample(n=1000, random_state=RAND)
-        y_raw = df[target_col].copy()
-        X     = df.drop(columns=[target_col]).copy()
     task = infer_task(y_raw)
     y, _  = _encode_target(y_raw, task)

 warnings.filterwarnings("ignore")
+# Allow importing model wrappers from the code directory
+sys.path.insert(0, str(Path(__file__).parent.parent / "code"))
+N_FOLDS   = int(os.getenv("N_FOLDS",   "5"))
 RAND      = int(os.getenv("RANDOM_STATE", "42"))
 HF_TOKEN  = os.getenv("HUGGING_FACE_HUB_TOKEN", "")
 def _tabpfn(task):
     if task != "classification":
         raise ValueError("TabPFN only supports classification tasks")
+    from models.tabpfn_wrapper import TabPFNWrapper
     return TabPFNWrapper(task_type=task, random_state=RAND)
 class _SAPModel:
     """
     Tries the real SAP RPT-1 OSS via HuggingFace; falls back to k-NN simulator
     if the package is not installed or authentication fails.
     """
     def __init__(self, task):
         self.task = task
         self._real = False
         if HF_TOKEN:
             try:
+                from huggingface_hub import login
+                login(token=HF_TOKEN, add_to_git_credential=False)
+                from sap_rpt_oss import SAP_RPT_OSS_Classifier, SAP_RPT_OSS_Regressor
+                if task == "classification":
+                    self._model = SAP_RPT_OSS_Classifier(max_context_size=2048, bagging=1)
                 else:
+                    self._model = SAP_RPT_OSS_Regressor(max_context_size=2048, bagging=1)
+                self._real = True
             except Exception:
                 self._init_sim()
         else:
         return self
     def predict(self, X):
+        preds = self._model.predict(X)
         if not self._real and self.task == "classification":
             preds = self._le.inverse_transform(preds)
         return preds
     def predict_proba(self, X):
         return self._model.predict_proba(X)
     @property
     else:
         splits = list(KFold(N_FOLDS, shuffle=True, random_state=RAND).split(X))
     fold_results = []
     for tr_idx, val_idx in splits:
         Xtr, Xval = X.iloc[tr_idx], X.iloc[val_idx]
         ytr, yval = y.iloc[tr_idx], y.iloc[val_idx]
+        # Capture encoders from training set and apply to validation set
+        Xtr_p, encoders = _prep(Xtr)
+        Xval_p, _       = _prep(Xval, encoders=encoders)
         model = builder(task)
         if task == "classification":
         return {}
     # Extract scores per fold for each model
+    # Matrix: rows = folds, cols = models
+    matrix = []
+    n_folds = 0
     for name in model_names:
         folds = results[name].get("folds", [])
+        n_folds = len(folds)
         scores = [f.get(primary, 0) for f in folds]
+        matrix.append(scores)
+    matrix = np.array(matrix).T  # Now (n_folds, n_models)
     # Calculate ranks for each fold (row)
     # Higher score = lower rank (1 is best). Using method='min' for competition ranking (ties get same best rank)
     dict with keys: dataset_info, task, results, ensemble_info, recommendation
     """
     try:
         from ensemble import select_top_models, run_voting_ensemble, run_stacking_ensemble, SKLEARN_SAFE
+    except ImportError:
+        from webapp.ensemble import select_top_models, run_voting_ensemble, run_stacking_ensemble, SKLEARN_SAFE
     y_raw = df[target_col].copy()
     X     = df.drop(columns=[target_col]).copy()
     task = infer_task(y_raw)
     y, _  = _encode_target(y_raw, task)

webapp/catboost_info/catboost_training.json CHANGED Viewed

@@ -1,204 +1,204 @@
 {
 "meta":{"test_sets":[],"test_metrics":[],"learn_metrics":[{"best_value":"Min","name":"MultiClass"}],"launch_mode":"Train","parameters":"","iteration_count":200,"learn_sets":["learn"],"name":"experiment"},
 "iterations":[
-{"learn":[1.091148423],"iteration":0,"passed_time":0.00282132088,"remaining_time":0.5614428552},
-{"learn":[1.086418765],"iteration":1,"passed_time":0.004661279676,"remaining_time":0.461466688},
-{"learn":[1.0773991],"iteration":2,"passed_time":0.00686823605,"remaining_time":0.4510141673},
-{"learn":[1.071672289],"iteration":3,"passed_time":0.009224603804,"remaining_time":0.4520055864},
-{"learn":[1.066004714],"iteration":4,"passed_time":0.011783565,"remaining_time":0.4595590352},
-{"learn":[1.059330013],"iteration":5,"passed_time":0.01414772569,"remaining_time":0.4574431307},
-{"learn":[1.05490226],"iteration":6,"passed_time":0.01652457928,"remaining_time":0.4556062573},
-{"learn":[1.048588391],"iteration":7,"passed_time":0.01874668105,"remaining_time":0.4499203453},
-{"learn":[1.043833033],"iteration":8,"passed_time":0.02056725855,"remaining_time":0.4364829315},
-{"learn":[1.037311438],"iteration":9,"passed_time":0.02246572539,"remaining_time":0.4268487824},
-{"learn":[1.031927739],"iteration":10,"passed_time":0.02500469334,"remaining_time":0.4296260947},
-{"learn":[1.025841223],"iteration":11,"passed_time":0.0273518553,"remaining_time":0.4285123996},
-{"learn":[1.021936274],"iteration":12,"passed_time":0.02974148281,"remaining_time":0.4278197913},
-{"learn":[1.016038582],"iteration":13,"passed_time":0.03166456883,"remaining_time":0.4206864144},
-{"learn":[1.011831586],"iteration":14,"passed_time":0.03329416411,"remaining_time":0.410628024},
-{"learn":[1.007661983],"iteration":15,"passed_time":0.03589234115,"remaining_time":0.4127619232},
-{"learn":[1.00331399],"iteration":16,"passed_time":0.03732309974,"remaining_time":0.4017721914},
-{"learn":[0.9969175017],"iteration":17,"passed_time":0.03904952284,"remaining_time":0.3948340642},
-{"learn":[0.9915381213],"iteration":18,"passed_time":0.04129692366,"remaining_time":0.3934075359},
-{"learn":[0.987027199],"iteration":19,"passed_time":0.04352230508,"remaining_time":0.3917007457},
-{"learn":[0.9806451898],"iteration":20,"passed_time":0.04767395464,"remaining_time":0.4063637086},
-{"learn":[0.97625914],"iteration":21,"passed_time":0.05769974089,"remaining_time":0.4668433581},
-{"learn":[0.9684721006],"iteration":22,"passed_time":0.08061496164,"remaining_time":0.6203847048},
-{"learn":[0.9650061706],"iteration":23,"passed_time":0.09306077751,"remaining_time":0.6824457017},
-{"learn":[0.9574638884],"iteration":24,"passed_time":0.10622083,"remaining_time":0.7435458098},
-{"learn":[0.9535340201],"iteration":25,"passed_time":0.1209920735,"remaining_time":0.8097161843},
-{"learn":[0.9493774514],"iteration":26,"passed_time":0.1297345247,"remaining_time":0.8312619547},
-{"learn":[0.9458850596],"iteration":27,"passed_time":0.1384080506,"remaining_time":0.8502208825},
-{"learn":[0.9407835813],"iteration":28,"passed_time":0.147551706,"remaining_time":0.8700462666},
-{"learn":[0.9365937962],"iteration":29,"passed_time":0.1543006766,"remaining_time":0.8743705007},
-{"learn":[0.9314428367],"iteration":30,"passed_time":0.1587093077,"remaining_time":0.86522171},
-{"learn":[0.9270029216],"iteration":31,"passed_time":0.1619613449,"remaining_time":0.8502970607},
-{"learn":[0.9234886611],"iteration":32,"passed_time":0.1646294734,"remaining_time":0.8331249109},
-{"learn":[0.9183877748],"iteration":33,"passed_time":0.169222928,"remaining_time":0.8262060601},
-{"learn":[0.9155318031],"iteration":34,"passed_time":0.1721472039,"remaining_time":0.811551104},
-{"learn":[0.9112328302],"iteration":35,"passed_time":0.1750561636,"remaining_time":0.7974780786},
-{"learn":[0.90756732],"iteration":36,"passed_time":0.1778744654,"remaining_time":0.7836091314},
-{"learn":[0.9006450864],"iteration":37,"passed_time":0.1810274889,"remaining_time":0.7717487683},
-{"learn":[0.898315496],"iteration":38,"passed_time":0.1877579839,"remaining_time":0.7751034722},
-{"learn":[0.8949441838],"iteration":39,"passed_time":0.1906573888,"remaining_time":0.7626295554},
-{"learn":[0.8903109401],"iteration":40,"passed_time":0.1937938245,"remaining_time":0.7515419046},
-{"learn":[0.8866011593],"iteration":41,"passed_time":0.1964065181,"remaining_time":0.7388626158},
-{"learn":[0.8818539442],"iteration":42,"passed_time":0.200377863,"remaining_time":0.7316121974},
-{"learn":[0.8766400926],"iteration":43,"passed_time":0.2023094867,"remaining_time":0.7172790892},
-{"learn":[0.8711942711],"iteration":44,"passed_time":0.2046830136,"remaining_time":0.7050192689},
-{"learn":[0.8664049339],"iteration":45,"passed_time":0.2074059643,"remaining_time":0.6943590979},
-{"learn":[0.8605692297],"iteration":46,"passed_time":0.2099182439,"remaining_time":0.6833508791},
-{"learn":[0.855305651],"iteration":47,"passed_time":0.2128794227,"remaining_time":0.6741181719},
-{"learn":[0.8498381077],"iteration":48,"passed_time":0.2167459054,"remaining_time":0.6679312594},
-{"learn":[0.8457962609],"iteration":49,"passed_time":0.218386666,"remaining_time":0.6551599981},
-{"learn":[0.8416413202],"iteration":50,"passed_time":0.2204240023,"remaining_time":0.6439838497},
-{"learn":[0.8364743562],"iteration":51,"passed_time":0.2233911129,"remaining_time":0.6358054752},
-{"learn":[0.8325194429],"iteration":52,"passed_time":0.2262674652,"remaining_time":0.627572026},
-{"learn":[0.8284764993],"iteration":53,"passed_time":0.2291484016,"remaining_time":0.6195493822},
-{"learn":[0.8236751265],"iteration":54,"passed_time":0.2311438446,"remaining_time":0.6093792266},
-{"learn":[0.8181696307],"iteration":55,"passed_time":0.2329766587,"remaining_time":0.5990828366},
-{"learn":[0.814313061],"iteration":56,"passed_time":0.2344071372,"remaining_time":0.588074046},
-{"learn":[0.8115976577],"iteration":57,"passed_time":0.2373364472,"remaining_time":0.581065095},
-{"learn":[0.807758466],"iteration":58,"passed_time":0.2402929971,"remaining_time":0.5742595354},
-{"learn":[0.802321422],"iteration":59,"passed_time":0.2432855194,"remaining_time":0.567666212},
-{"learn":[0.798807136],"iteration":60,"passed_time":0.2458100965,"remaining_time":0.5601246461},
-{"learn":[0.7948400764],"iteration":61,"passed_time":0.2483936116,"remaining_time":0.5528761033},
-{"learn":[0.7916880148],"iteration":62,"passed_time":0.2499576877,"remaining_time":0.5435587812},
-{"learn":[0.7873311755],"iteration":63,"passed_time":0.2520703854,"remaining_time":0.5356495691},
-{"learn":[0.7827613451],"iteration":64,"passed_time":0.2549412732,"remaining_time":0.5294934135},
-{"learn":[0.7786702385],"iteration":65,"passed_time":0.2581479894,"remaining_time":0.5241186452},
-{"learn":[0.7764977254],"iteration":66,"passed_time":0.2605647051,"remaining_time":0.5172403848},
-{"learn":[0.7730194967],"iteration":67,"passed_time":0.2630132522,"remaining_time":0.5105551367},
-{"learn":[0.7696007626],"iteration":68,"passed_time":0.2654128606,"remaining_time":0.5038997788},
-{"learn":[0.7642752146],"iteration":69,"passed_time":0.2680231461,"remaining_time":0.4977572714},
-{"learn":[0.760253606],"iteration":70,"passed_time":0.2703621365,"remaining_time":0.4912213467},
-{"learn":[0.7577785571],"iteration":71,"passed_time":0.2723268734,"remaining_time":0.4841366639},
-{"learn":[0.7539434543],"iteration":72,"passed_time":0.2750061922,"remaining_time":0.4784354303},
-{"learn":[0.7495444532],"iteration":73,"passed_time":0.2772363226,"remaining_time":0.4720510358},
-{"learn":[0.7455923189],"iteration":74,"passed_time":0.2801728829,"remaining_time":0.4669548049},
-{"learn":[0.7429488753],"iteration":75,"passed_time":0.2820680153,"remaining_time":0.4602162355},
-{"learn":[0.7400940248],"iteration":76,"passed_time":0.2838023389,"remaining_time":0.4533465933},
-{"learn":[0.737176055],"iteration":77,"passed_time":0.2862566885,"remaining_time":0.4477348204},
-{"learn":[0.7334430947],"iteration":78,"passed_time":0.2886098781,"remaining_time":0.4420480412},
-{"learn":[0.7307758227],"iteration":79,"passed_time":0.2907228845,"remaining_time":0.4360843268},
-{"learn":[0.7271924097],"iteration":80,"passed_time":0.2926877115,"remaining_time":0.4299979959},
-{"learn":[0.7233691229],"iteration":81,"passed_time":0.2951436988,"remaining_time":0.4247189812},
-{"learn":[0.7199860815],"iteration":82,"passed_time":0.2976942861,"remaining_time":0.419641343},
-{"learn":[0.7166443842],"iteration":83,"passed_time":0.2999935398,"remaining_time":0.4142767931},
-{"learn":[0.7120905194],"iteration":84,"passed_time":0.3025034543,"remaining_time":0.4092693794},
-{"learn":[0.7080689226],"iteration":85,"passed_time":0.3052517734,"remaining_time":0.4046360717},
-{"learn":[0.7049974284],"iteration":86,"passed_time":0.3065849766,"remaining_time":0.398208073},
-{"learn":[0.7008658577],"iteration":87,"passed_time":0.3087478548,"remaining_time":0.3929518152},
-{"learn":[0.6968041294],"iteration":88,"passed_time":0.3113987764,"remaining_time":0.3883737549},
-{"learn":[0.6949067976],"iteration":89,"passed_time":0.314102279,"remaining_time":0.3839027854},
-{"learn":[0.6923908515],"iteration":90,"passed_time":0.3160477997,"remaining_time":0.378562749},
-{"learn":[0.6874475769],"iteration":91,"passed_time":0.3187214071,"remaining_time":0.3741512171},
-{"learn":[0.6834969954],"iteration":92,"passed_time":0.3207574415,"remaining_time":0.3690435079},
-{"learn":[0.6797426739],"iteration":93,"passed_time":0.3226379974,"remaining_time":0.3638258268},
-{"learn":[0.676311281],"iteration":94,"passed_time":0.3251686663,"remaining_time":0.359396947},
-{"learn":[0.6732950825],"iteration":95,"passed_time":0.328981045,"remaining_time":0.3563961321},
-{"learn":[0.670343132],"iteration":96,"passed_time":0.3310233733,"remaining_time":0.3514990459},
-{"learn":[0.6672527936],"iteration":97,"passed_time":0.3338898708,"remaining_time":0.3475180288},
-{"learn":[0.6654513235],"iteration":98,"passed_time":0.3363799535,"remaining_time":0.3431755081},
-{"learn":[0.6629430399],"iteration":99,"passed_time":0.3386832141,"remaining_time":0.3386832141},
-{"learn":[0.6599425476],"iteration":100,"passed_time":0.3414548313,"remaining_time":0.3346933495},
-{"learn":[0.6563456648],"iteration":101,"passed_time":0.3435269652,"remaining_time":0.3300553195},
-{"learn":[0.6533420617],"iteration":102,"passed_time":0.3458327628,"remaining_time":0.325687165},
-{"learn":[0.6521604983],"iteration":103,"passed_time":0.3474527952,"remaining_time":0.3207256571},
-{"learn":[0.6486732019],"iteration":104,"passed_time":0.3493654418,"remaining_time":0.3160925425},
-{"learn":[0.6452204016],"iteration":105,"passed_time":0.3512939785,"remaining_time":0.3115248489},
-{"learn":[0.6428646774],"iteration":106,"passed_time":0.3530746596,"remaining_time":0.3068779752},
-{"learn":[0.6392699247],"iteration":107,"passed_time":0.3555656343,"remaining_time":0.302889244},
-{"learn":[0.6357158219],"iteration":108,"passed_time":0.357897057,"remaining_time":0.2987947907},
-{"learn":[0.6328221906],"iteration":109,"passed_time":0.360163507,"remaining_time":0.294679233},
-{"learn":[0.6295509884],"iteration":110,"passed_time":0.36264645,"remaining_time":0.290770577},
-{"learn":[0.6261120005],"iteration":111,"passed_time":0.3645674342,"remaining_time":0.2864458411},
-{"learn":[0.6236044192],"iteration":112,"passed_time":0.3665369016,"remaining_time":0.2822009773},
-{"learn":[0.6212460917],"iteration":113,"passed_time":0.3679470133,"remaining_time":0.2775740626},
-{"learn":[0.6177241089],"iteration":114,"passed_time":0.3701892461,"remaining_time":0.2736181385},
-{"learn":[0.6157604351],"iteration":115,"passed_time":0.3723852712,"remaining_time":0.2696582998},
-{"learn":[0.6132566809],"iteration":116,"passed_time":0.374601129,"remaining_time":0.2657426813},
-{"learn":[0.6104162939],"iteration":117,"passed_time":0.3771927004,"remaining_time":0.2621169613},
-{"learn":[0.6079636357],"iteration":118,"passed_time":0.3787314159,"remaining_time":0.2577919721},
-{"learn":[0.6063102359],"iteration":119,"passed_time":0.3800805327,"remaining_time":0.2533870218},
-{"learn":[0.602804494],"iteration":120,"passed_time":0.3818635245,"remaining_time":0.2493158548},
-{"learn":[0.5991211844],"iteration":121,"passed_time":0.3832718363,"remaining_time":0.2450426494},
-{"learn":[0.5958434122],"iteration":122,"passed_time":0.3852467789,"remaining_time":0.2411707478},
-{"learn":[0.5932912801],"iteration":123,"passed_time":0.3872305242,"remaining_time":0.2373348374},
-{"learn":[0.5918961165],"iteration":124,"passed_time":0.3893693878,"remaining_time":0.2336216327},
-{"learn":[0.5896874223],"iteration":125,"passed_time":0.3914201661,"remaining_time":0.2298816848},
-{"learn":[0.5868596712],"iteration":126,"passed_time":0.3941199966,"remaining_time":0.2265414154},
-{"learn":[0.584461876],"iteration":127,"passed_time":0.3958663077,"remaining_time":0.2226747981},
-{"learn":[0.5808172852],"iteration":128,"passed_time":0.4007838151,"remaining_time":0.2205864409},
-{"learn":[0.5787637725],"iteration":129,"passed_time":0.4068101448,"remaining_time":0.2190516165},
-{"learn":[0.5750088728],"iteration":130,"passed_time":0.4146420973,"remaining_time":0.2183992726},
-{"learn":[0.5727294917],"iteration":131,"passed_time":0.420835855,"remaining_time":0.2167942283},
-{"learn":[0.5707527172],"iteration":132,"passed_time":0.4282958359,"remaining_time":0.2157580527},
-{"learn":[0.5675028367],"iteration":133,"passed_time":0.4351253386,"remaining_time":0.2143154653},
-{"learn":[0.5651542372],"iteration":134,"passed_time":0.4419929698,"remaining_time":0.2128114299},
-{"learn":[0.5618670971],"iteration":135,"passed_time":0.4522148581,"remaining_time":0.2128069921},
-{"learn":[0.5595066967],"iteration":136,"passed_time":0.4781720255,"remaining_time":0.2198893256},
-{"learn":[0.5571694458],"iteration":137,"passed_time":0.4879483047,"remaining_time":0.2192231514},
-{"learn":[0.5549336755],"iteration":138,"passed_time":0.4951066577,"remaining_time":0.2172770224},
-{"learn":[0.5532775438],"iteration":139,"passed_time":0.5011632957,"remaining_time":0.2147842696},
-{"learn":[0.5514989197],"iteration":140,"passed_time":0.5063335673,"remaining_time":0.2118700743},
-{"learn":[0.5486121339],"iteration":141,"passed_time":0.5122684628,"remaining_time":0.2092364144},
-{"learn":[0.546200445],"iteration":142,"passed_time":0.5159091414,"remaining_time":0.2056421053},
-{"learn":[0.5435735305],"iteration":143,"passed_time":0.5191018998,"remaining_time":0.201872961},
-{"learn":[0.5412421678],"iteration":144,"passed_time":0.521702871,"remaining_time":0.1978872959},
-{"learn":[0.5400257427],"iteration":145,"passed_time":0.5238612043,"remaining_time":0.1937568838},
-{"learn":[0.5377417958],"iteration":146,"passed_time":0.5276108955,"remaining_time":0.1902270576},
-{"learn":[0.5355061154],"iteration":147,"passed_time":0.5300877322,"remaining_time":0.186247041},
-{"learn":[0.5336979056],"iteration":148,"passed_time":0.5328222124,"remaining_time":0.1823753882},
-{"learn":[0.5324000403],"iteration":149,"passed_time":0.5353904443,"remaining_time":0.1784634814},
-{"learn":[0.529710259],"iteration":150,"passed_time":0.5381127569,"remaining_time":0.1746193714},
-{"learn":[0.5261298352],"iteration":151,"passed_time":0.542975469,"remaining_time":0.1714659376},
-{"learn":[0.524202506],"iteration":152,"passed_time":0.5458683536,"remaining_time":0.1676850498},
-{"learn":[0.5218890341],"iteration":153,"passed_time":0.5479374329,"remaining_time":0.1636696228},
-{"learn":[0.5199542674],"iteration":154,"passed_time":0.5499653081,"remaining_time":0.1596673475},
-{"learn":[0.5170063595],"iteration":155,"passed_time":0.5522630441,"remaining_time":0.1557664996},
-{"learn":[0.514594135],"iteration":156,"passed_time":0.5558652196,"remaining_time":0.1522433404},
-{"learn":[0.5124848793],"iteration":157,"passed_time":0.5601070832,"remaining_time":0.1488892246},
-{"learn":[0.5080709565],"iteration":158,"passed_time":0.5630406954,"remaining_time":0.1451865944},
-{"learn":[0.5061450651],"iteration":159,"passed_time":0.5684726582,"remaining_time":0.1421181646},
-{"learn":[0.5042162721],"iteration":160,"passed_time":0.5760691223,"remaining_time":0.1395446942},
-{"learn":[0.5021700926],"iteration":161,"passed_time":0.5788425276,"remaining_time":0.1357778768},
-{"learn":[0.4989800163],"iteration":162,"passed_time":0.5814199747,"remaining_time":0.1319787673},
-{"learn":[0.4964408967],"iteration":163,"passed_time":0.5843230052,"remaining_time":0.1282660255},
-{"learn":[0.494442716],"iteration":164,"passed_time":0.5874847574,"remaining_time":0.1246179788},
-{"learn":[0.4921323494],"iteration":165,"passed_time":0.5914682955,"remaining_time":0.1211441087},
-{"learn":[0.4892845676],"iteration":166,"passed_time":0.5942250571,"remaining_time":0.1174217179},
-{"learn":[0.4867820408],"iteration":167,"passed_time":0.5973976121,"remaining_time":0.1137900214},
-{"learn":[0.4847887077],"iteration":168,"passed_time":0.6003783558,"remaining_time":0.1101285741},
-{"learn":[0.4819279446],"iteration":169,"passed_time":0.6021641262,"remaining_time":0.1062642576},
-{"learn":[0.4800765981],"iteration":170,"passed_time":0.6036769811,"remaining_time":0.1023779676},
-{"learn":[0.4776716805],"iteration":171,"passed_time":0.6070431417,"remaining_time":0.09882097655},
-{"learn":[0.4756304192],"iteration":172,"passed_time":0.6089885033,"remaining_time":0.09504444849},
-{"learn":[0.4738263312],"iteration":173,"passed_time":0.6111772912,"remaining_time":0.09132534236},
-{"learn":[0.4708585845],"iteration":174,"passed_time":0.6131144527,"remaining_time":0.08758777896},
-{"learn":[0.4695127327],"iteration":175,"passed_time":0.6153722409,"remaining_time":0.08391439648},
-{"learn":[0.4680357734],"iteration":176,"passed_time":0.6178598629,"remaining_time":0.08028687484},
-{"learn":[0.4656655379],"iteration":177,"passed_time":0.6200927552,"remaining_time":0.07664067761},
-{"learn":[0.4641295221],"iteration":178,"passed_time":0.6234510855,"remaining_time":0.07314230612},
-{"learn":[0.4629025785],"iteration":179,"passed_time":0.6251597123,"remaining_time":0.06946219026},
-{"learn":[0.4611566405],"iteration":180,"passed_time":0.6275776105,"remaining_time":0.06587831271},
-{"learn":[0.4583394488],"iteration":181,"passed_time":0.6303844571,"remaining_time":0.06234571554},
-{"learn":[0.4557703947],"iteration":182,"passed_time":0.6328000346,"remaining_time":0.05878470267},
-{"learn":[0.453369136],"iteration":183,"passed_time":0.635321629,"remaining_time":0.05524535904},
-{"learn":[0.4511683723],"iteration":184,"passed_time":0.6376511671,"remaining_time":0.05170144598},
-{"learn":[0.4489689354],"iteration":185,"passed_time":0.6394511361,"remaining_time":0.04813073067},
-{"learn":[0.4460674255],"iteration":186,"passed_time":0.6411012041,"remaining_time":0.0445685329},
-{"learn":[0.4442073274],"iteration":187,"passed_time":0.6428618492,"remaining_time":0.04103373506},
-{"learn":[0.4422920248],"iteration":188,"passed_time":0.6443135751,"remaining_time":0.03749973189},
-{"learn":[0.4410763507],"iteration":189,"passed_time":0.6465920922,"remaining_time":0.03403116275},
-{"learn":[0.4396820004],"iteration":190,"passed_time":0.6490610492,"remaining_time":0.0305840285},
-{"learn":[0.4384941295],"iteration":191,"passed_time":0.6511965491,"remaining_time":0.02713318954},
-{"learn":[0.4362369638],"iteration":192,"passed_time":0.6528933411,"remaining_time":0.02368006936},
-{"learn":[0.434566568],"iteration":193,"passed_time":0.6549387261,"remaining_time":0.02025583689},
-{"learn":[0.4327457921],"iteration":194,"passed_time":0.6562730476,"remaining_time":0.01682751404},
-{"learn":[0.4307460928],"iteration":195,"passed_time":0.6580760406,"remaining_time":0.01343012328},
-{"learn":[0.4291619818],"iteration":196,"passed_time":0.6599830219,"remaining_time":0.01005050287},
-{"learn":[0.4275679969],"iteration":197,"passed_time":0.6664784173,"remaining_time":0.006732105225},
-{"learn":[0.425495808],"iteration":198,"passed_time":0.6736799319,"remaining_time":0.003385326291},
-{"learn":[0.4246656913],"iteration":199,"passed_time":0.6791263424,"remaining_time":0}
 ]}

 {
 "meta":{"test_sets":[],"test_metrics":[],"learn_metrics":[{"best_value":"Min","name":"MultiClass"}],"launch_mode":"Train","parameters":"","iteration_count":200,"learn_sets":["learn"],"name":"experiment"},
 "iterations":[
+{"learn":[1.093205242],"iteration":0,"passed_time":0.002700530746,"remaining_time":0.5374056184},
+{"learn":[1.089296198],"iteration":1,"passed_time":0.004458564866,"remaining_time":0.4413979218},
+{"learn":[1.085800463],"iteration":2,"passed_time":0.006334754819,"remaining_time":0.4159822331},
+{"learn":[1.082940579],"iteration":3,"passed_time":0.007956610432,"remaining_time":0.3898739111},
+{"learn":[1.076902286],"iteration":4,"passed_time":0.00973436367,"remaining_time":0.3796401831},
+{"learn":[1.072424915],"iteration":5,"passed_time":0.01158463775,"remaining_time":0.3745699538},
+{"learn":[1.070486521],"iteration":6,"passed_time":0.01333745344,"remaining_time":0.3677326448},
+{"learn":[1.067709739],"iteration":7,"passed_time":0.01524372946,"remaining_time":0.3658495071},
+{"learn":[1.063947286],"iteration":8,"passed_time":0.01734970754,"remaining_time":0.3681993489},
+{"learn":[1.059022405],"iteration":9,"passed_time":0.01928384774,"remaining_time":0.366393107},
+{"learn":[1.054605749],"iteration":10,"passed_time":0.02110991174,"remaining_time":0.3627066654},
+{"learn":[1.051037029],"iteration":11,"passed_time":0.02276974228,"remaining_time":0.3567259623},
+{"learn":[1.046238247],"iteration":12,"passed_time":0.02424200037,"remaining_time":0.3487118515},
+{"learn":[1.041694363],"iteration":13,"passed_time":0.02632824361,"remaining_time":0.3497895223},
+{"learn":[1.037651376],"iteration":14,"passed_time":0.02835464484,"remaining_time":0.3497072863},
+{"learn":[1.034067664],"iteration":15,"passed_time":0.03027838798,"remaining_time":0.3482014617},
+{"learn":[1.031380894],"iteration":16,"passed_time":0.03188953079,"remaining_time":0.3432814197},
+{"learn":[1.027554993],"iteration":17,"passed_time":0.0335196191,"remaining_time":0.3389205931},
+{"learn":[1.023928755],"iteration":18,"passed_time":0.0350220636,"remaining_time":0.3336312375},
+{"learn":[1.020661502],"iteration":19,"passed_time":0.03687955023,"remaining_time":0.3319159521},
+{"learn":[1.015603636],"iteration":20,"passed_time":0.03856337309,"remaining_time":0.3287068468},
+{"learn":[1.013288081],"iteration":21,"passed_time":0.04062659297,"remaining_time":0.3287060704},
+{"learn":[1.010926858],"iteration":22,"passed_time":0.04219237606,"remaining_time":0.3246978506},
+{"learn":[1.007751412],"iteration":23,"passed_time":0.04403800734,"remaining_time":0.3229453871},
+{"learn":[1.004360662],"iteration":24,"passed_time":0.04581507457,"remaining_time":0.320705522},
+{"learn":[0.9993662657],"iteration":25,"passed_time":0.04787310278,"remaining_time":0.320381534},
+{"learn":[0.9969451375],"iteration":26,"passed_time":0.05006162113,"remaining_time":0.320765202},
+{"learn":[0.9945409056],"iteration":27,"passed_time":0.05225948328,"remaining_time":0.3210225402},
+{"learn":[0.9907006517],"iteration":28,"passed_time":0.05440212683,"remaining_time":0.3207849547},
+{"learn":[0.9885561607],"iteration":29,"passed_time":0.05667896345,"remaining_time":0.3211807929},
+{"learn":[0.984248119],"iteration":30,"passed_time":0.05890604908,"remaining_time":0.3211329772},
+{"learn":[0.9807137036],"iteration":31,"passed_time":0.06108680467,"remaining_time":0.3207057245},
+{"learn":[0.9780850221],"iteration":32,"passed_time":0.06319012336,"remaining_time":0.3197803213},
+{"learn":[0.9740531802],"iteration":33,"passed_time":0.06508580667,"remaining_time":0.3177718796},
+{"learn":[0.9713410447],"iteration":34,"passed_time":0.06668467638,"remaining_time":0.3143706172},
+{"learn":[0.967412389],"iteration":35,"passed_time":0.06838351167,"remaining_time":0.3115248865},
+{"learn":[0.9644315086],"iteration":36,"passed_time":0.07007147161,"remaining_time":0.3086932398},
+{"learn":[0.9592457648],"iteration":37,"passed_time":0.07173115431,"remaining_time":0.3058012368},
+{"learn":[0.9566252893],"iteration":38,"passed_time":0.07339949347,"remaining_time":0.3030081654},
+{"learn":[0.9529972622],"iteration":39,"passed_time":0.07517147841,"remaining_time":0.3006859136},
+{"learn":[0.9503961485],"iteration":40,"passed_time":0.07705750652,"remaining_time":0.2988327692},
+{"learn":[0.9478966399],"iteration":41,"passed_time":0.07871183332,"remaining_time":0.2961064206},
+{"learn":[0.944616608],"iteration":42,"passed_time":0.08037803821,"remaining_time":0.2934733023},
+{"learn":[0.9409584079],"iteration":43,"passed_time":0.08202857612,"remaining_time":0.2908285881},
+{"learn":[0.9385875314],"iteration":44,"passed_time":0.08393755501,"remaining_time":0.289118245},
+{"learn":[0.9358293633],"iteration":45,"passed_time":0.08562284219,"remaining_time":0.2866503847},
+{"learn":[0.9320133517],"iteration":46,"passed_time":0.08753530594,"remaining_time":0.2849553576},
+{"learn":[0.9284762006],"iteration":47,"passed_time":0.08921765077,"remaining_time":0.2825225608},
+{"learn":[0.9252772309],"iteration":48,"passed_time":0.09072459024,"remaining_time":0.2795798597},
+{"learn":[0.9216958631],"iteration":49,"passed_time":0.09242161705,"remaining_time":0.2772648511},
+{"learn":[0.918240019],"iteration":50,"passed_time":0.0940251503,"remaining_time":0.2747009293},
+{"learn":[0.9149621868],"iteration":51,"passed_time":0.09566766222,"remaining_time":0.2722848848},
+{"learn":[0.9118360189],"iteration":52,"passed_time":0.09735125731,"remaining_time":0.2700119778},
+{"learn":[0.9081751934],"iteration":53,"passed_time":0.09917284675,"remaining_time":0.2681339931},
+{"learn":[0.9050419952],"iteration":54,"passed_time":0.1010378643,"remaining_time":0.2663725514},
+{"learn":[0.9017628251],"iteration":55,"passed_time":0.1025776132,"remaining_time":0.2637710053},
+{"learn":[0.8993296976],"iteration":56,"passed_time":0.1045173621,"remaining_time":0.2622102242},
+{"learn":[0.8964495626],"iteration":57,"passed_time":0.106001424,"remaining_time":0.2595207278},
+{"learn":[0.893758375],"iteration":58,"passed_time":0.1074786601,"remaining_time":0.2568557809},
+{"learn":[0.891306073],"iteration":59,"passed_time":0.1091309951,"remaining_time":0.2546389886},
+{"learn":[0.8896240451],"iteration":60,"passed_time":0.1108353893,"remaining_time":0.2525593298},
+{"learn":[0.886672253],"iteration":61,"passed_time":0.11242446,"remaining_time":0.2502350884},
+{"learn":[0.8831787359],"iteration":62,"passed_time":0.1142783889,"remaining_time":0.2485101472},
+{"learn":[0.8807247302],"iteration":63,"passed_time":0.115970814,"remaining_time":0.2464379798},
+{"learn":[0.8775340418],"iteration":64,"passed_time":0.1176347221,"remaining_time":0.2443182689},
+{"learn":[0.8757817027],"iteration":65,"passed_time":0.1192678026,"remaining_time":0.242149781},
+{"learn":[0.8731628387],"iteration":66,"passed_time":0.1209697765,"remaining_time":0.240134034},
+{"learn":[0.870440569],"iteration":67,"passed_time":0.1225921289,"remaining_time":0.237972956},
+{"learn":[0.8676778125],"iteration":68,"passed_time":0.1242741951,"remaining_time":0.2359408631},
+{"learn":[0.864782525],"iteration":69,"passed_time":0.1259839438,"remaining_time":0.2339701814},
+{"learn":[0.8619514313],"iteration":70,"passed_time":0.1275830276,"remaining_time":0.2318057826},
+{"learn":[0.8593813157],"iteration":71,"passed_time":0.1293105164,"remaining_time":0.2298853625},
+{"learn":[0.8566989735],"iteration":72,"passed_time":0.1313927413,"remaining_time":0.2285873719},
+{"learn":[0.8538559437],"iteration":73,"passed_time":0.1335838702,"remaining_time":0.2274536169},
+{"learn":[0.8514273334],"iteration":74,"passed_time":0.1351317755,"remaining_time":0.2252196258},
+{"learn":[0.8485875602],"iteration":75,"passed_time":0.1366301859,"remaining_time":0.2229229349},
+{"learn":[0.8454171877],"iteration":76,"passed_time":0.1381832845,"remaining_time":0.2207343376},
+{"learn":[0.8434237571],"iteration":77,"passed_time":0.1402676808,"remaining_time":0.2193930392},
+{"learn":[0.8397825549],"iteration":78,"passed_time":0.1574408193,"remaining_time":0.2411435334},
+{"learn":[0.8370105039],"iteration":79,"passed_time":0.1594920899,"remaining_time":0.2392381348},
+{"learn":[0.8342251022],"iteration":80,"passed_time":0.1614106875,"remaining_time":0.2371342199},
+{"learn":[0.8315255385],"iteration":81,"passed_time":0.1631617044,"remaining_time":0.2347936722},
+{"learn":[0.8298459099],"iteration":82,"passed_time":0.1653348327,"remaining_time":0.2330623546},
+{"learn":[0.8283612294],"iteration":83,"passed_time":0.1671237068,"remaining_time":0.2307898808},
+{"learn":[0.8246264822],"iteration":84,"passed_time":0.1687709047,"remaining_time":0.2283371063},
+{"learn":[0.8213473255],"iteration":85,"passed_time":0.1706165283,"remaining_time":0.2261660956},
+{"learn":[0.8196070074],"iteration":86,"passed_time":0.1725107272,"remaining_time":0.2240656571},
+{"learn":[0.8166511145],"iteration":87,"passed_time":0.174413605,"remaining_time":0.2219809518},
+{"learn":[0.8135280587],"iteration":88,"passed_time":0.1759187212,"remaining_time":0.2194042478},
+{"learn":[0.81036371],"iteration":89,"passed_time":0.1774937264,"remaining_time":0.2169367767},
+{"learn":[0.8066284815],"iteration":90,"passed_time":0.1789450656,"remaining_time":0.2143407929},
+{"learn":[0.8038651041],"iteration":91,"passed_time":0.180849495,"remaining_time":0.2123015811},
+{"learn":[0.8010024622],"iteration":92,"passed_time":0.1823891432,"remaining_time":0.2098455733},
+{"learn":[0.7965903543],"iteration":93,"passed_time":0.1839416246,"remaining_time":0.2074235342},
+{"learn":[0.7938158579],"iteration":94,"passed_time":0.1856494004,"remaining_time":0.2051914425},
+{"learn":[0.7917355846],"iteration":95,"passed_time":0.1873532136,"remaining_time":0.2029659814},
+{"learn":[0.789231216],"iteration":96,"passed_time":0.1889143164,"remaining_time":0.2005997381},
+{"learn":[0.7869594182],"iteration":97,"passed_time":0.1906026299,"remaining_time":0.1983823291},
+{"learn":[0.7844456343],"iteration":98,"passed_time":0.1925205422,"remaining_time":0.1964098461},
+{"learn":[0.7827182622],"iteration":99,"passed_time":0.1940725879,"remaining_time":0.1940725879},
+{"learn":[0.7809365526],"iteration":100,"passed_time":0.1955105954,"remaining_time":0.1916390985},
+{"learn":[0.7786073713],"iteration":101,"passed_time":0.1971157254,"remaining_time":0.189385697},
+{"learn":[0.7756969319],"iteration":102,"passed_time":0.1988023642,"remaining_time":0.1872216439},
+{"learn":[0.7725361088],"iteration":103,"passed_time":0.2007437213,"remaining_time":0.1853018966},
+{"learn":[0.7707524593],"iteration":104,"passed_time":0.2023988274,"remaining_time":0.1831227486},
+{"learn":[0.7680995843],"iteration":105,"passed_time":0.2039996243,"remaining_time":0.1809053272},
+{"learn":[0.7661193713],"iteration":106,"passed_time":0.2059617819,"remaining_time":0.1790135114},
+{"learn":[0.7638792989],"iteration":107,"passed_time":0.2077158662,"remaining_time":0.1769431453},
+{"learn":[0.7615820938],"iteration":108,"passed_time":0.2095298505,"remaining_time":0.1749285908},
+{"learn":[0.7598842725],"iteration":109,"passed_time":0.2110274037,"remaining_time":0.1726587848},
+{"learn":[0.7561665501],"iteration":110,"passed_time":0.212599817,"remaining_time":0.1704629163},
+{"learn":[0.7532353233],"iteration":111,"passed_time":0.2148096158,"remaining_time":0.1687789838},
+{"learn":[0.7512791352],"iteration":112,"passed_time":0.2163519741,"remaining_time":0.1665718739},
+{"learn":[0.7490483463],"iteration":113,"passed_time":0.2178098439,"remaining_time":0.1643126893},
+{"learn":[0.7456312231],"iteration":114,"passed_time":0.2194688296,"remaining_time":0.1622160914},
+{"learn":[0.7423207006],"iteration":115,"passed_time":0.2214830641,"remaining_time":0.1603842878},
+{"learn":[0.7396266689],"iteration":116,"passed_time":0.223562745,"remaining_time":0.1585957934},
+{"learn":[0.7374406621],"iteration":117,"passed_time":0.2250632395,"remaining_time":0.1563998783},
+{"learn":[0.7347465285],"iteration":118,"passed_time":0.2267205133,"remaining_time":0.1543223662},
+{"learn":[0.7316488359],"iteration":119,"passed_time":0.2285644763,"remaining_time":0.1523763175},
+{"learn":[0.7291388918],"iteration":120,"passed_time":0.230050533,"remaining_time":0.1501982819},
+{"learn":[0.7274213837],"iteration":121,"passed_time":0.2316809621,"remaining_time":0.1481238938},
+{"learn":[0.7255509963],"iteration":122,"passed_time":0.2333922528,"remaining_time":0.1461073453},
+{"learn":[0.7228838739],"iteration":123,"passed_time":0.235182611,"remaining_time":0.1441441809},
+{"learn":[0.7212786404],"iteration":124,"passed_time":0.2368341941,"remaining_time":0.1421005164},
+{"learn":[0.7192994996],"iteration":125,"passed_time":0.2386781333,"remaining_time":0.1401760465},
+{"learn":[0.7170966975],"iteration":126,"passed_time":0.2404280474,"remaining_time":0.1381987989},
+{"learn":[0.7152245863],"iteration":127,"passed_time":0.2420706343,"remaining_time":0.1361647318},
+{"learn":[0.7137431062],"iteration":128,"passed_time":0.2437023638,"remaining_time":0.1341307584},
+{"learn":[0.7110045802],"iteration":129,"passed_time":0.2452986882,"remaining_time":0.132083909},
+{"learn":[0.7083429294],"iteration":130,"passed_time":0.24715349,"remaining_time":0.1301800825},
+{"learn":[0.7059155467],"iteration":131,"passed_time":0.2492658582,"remaining_time":0.1284096845},
+{"learn":[0.7045010934],"iteration":132,"passed_time":0.2510431177,"remaining_time":0.12646533},
+{"learn":[0.701902575],"iteration":133,"passed_time":0.2530067292,"remaining_time":0.1246152547},
+{"learn":[0.7001669261],"iteration":134,"passed_time":0.2547815629,"remaining_time":0.1226726043},
+{"learn":[0.6980658317],"iteration":135,"passed_time":0.2564193979,"remaining_time":0.1206679519},
+{"learn":[0.6961677749],"iteration":136,"passed_time":0.2581067099,"remaining_time":0.1186914068},
+{"learn":[0.6930208614],"iteration":137,"passed_time":0.2597519338,"remaining_time":0.1167001442},
+{"learn":[0.6907435615],"iteration":138,"passed_time":0.2614007832,"remaining_time":0.1147154516},
+{"learn":[0.6889091552],"iteration":139,"passed_time":0.2629295781,"remaining_time":0.1126841049},
+{"learn":[0.6860876176],"iteration":140,"passed_time":0.2646986602,"remaining_time":0.1107604323},
+{"learn":[0.6828681848],"iteration":141,"passed_time":0.266390301,"remaining_time":0.108807306},
+{"learn":[0.6806217027],"iteration":142,"passed_time":0.2681440335,"remaining_time":0.1068825868},
+{"learn":[0.6774741387],"iteration":143,"passed_time":0.270101295,"remaining_time":0.1050393925},
+{"learn":[0.6755280009],"iteration":144,"passed_time":0.2718263867,"remaining_time":0.1031065605},
+{"learn":[0.6741665407],"iteration":145,"passed_time":0.2735399912,"remaining_time":0.1011723255},
+{"learn":[0.6714424836],"iteration":146,"passed_time":0.2750842491,"remaining_time":0.09918003538},
+{"learn":[0.6690518867],"iteration":147,"passed_time":0.2767590178,"remaining_time":0.0972396549},
+{"learn":[0.6670540904],"iteration":148,"passed_time":0.2789507193,"remaining_time":0.09547977641},
+{"learn":[0.6654624264],"iteration":149,"passed_time":0.2806616785,"remaining_time":0.09355389284},
+{"learn":[0.6635237109],"iteration":150,"passed_time":0.2827075276,"remaining_time":0.09173952883},
+{"learn":[0.6614735621],"iteration":151,"passed_time":0.2846036611,"remaining_time":0.08987484035},
+{"learn":[0.6601319451],"iteration":152,"passed_time":0.2863283374,"remaining_time":0.08795707095},
+{"learn":[0.6574872141],"iteration":153,"passed_time":0.2881504225,"remaining_time":0.08607090542},
+{"learn":[0.6558207681],"iteration":154,"passed_time":0.2901007739,"remaining_time":0.08422280532},
+{"learn":[0.6541525713],"iteration":155,"passed_time":0.2916399925,"remaining_time":0.08225743379},
+{"learn":[0.6527845902],"iteration":156,"passed_time":0.2934304534,"remaining_time":0.08036630251},
+{"learn":[0.6505051681],"iteration":157,"passed_time":0.2953386008,"remaining_time":0.07850772932},
+{"learn":[0.6491150255],"iteration":158,"passed_time":0.2967857377,"remaining_time":0.07652965563},
+{"learn":[0.6468753418],"iteration":159,"passed_time":0.2984442862,"remaining_time":0.07461107155},
+{"learn":[0.6442990818],"iteration":160,"passed_time":0.3000145388,"remaining_time":0.07267432928},
+{"learn":[0.6423880435],"iteration":161,"passed_time":0.3015877137,"remaining_time":0.07074279705},
+{"learn":[0.6402743573],"iteration":162,"passed_time":0.3032131184,"remaining_time":0.06882751768},
+{"learn":[0.6387280436],"iteration":163,"passed_time":0.3049731296,"remaining_time":0.06694532114},
+{"learn":[0.6370099287],"iteration":164,"passed_time":0.3069621817,"remaining_time":0.06511319006},
+{"learn":[0.6359368009],"iteration":165,"passed_time":0.3087835233,"remaining_time":0.06324481803},
+{"learn":[0.6342299731],"iteration":166,"passed_time":0.3102145076,"remaining_time":0.06129987276},
+{"learn":[0.6325437121],"iteration":167,"passed_time":0.3118666874,"remaining_time":0.05940317856},
+{"learn":[0.6296576212],"iteration":168,"passed_time":0.3134897388,"remaining_time":0.05750403492},
+{"learn":[0.6276389296],"iteration":169,"passed_time":0.3151600767,"remaining_time":0.05561648413},
+{"learn":[0.6260245742],"iteration":170,"passed_time":0.3167978743,"remaining_time":0.05372595529},
+{"learn":[0.6248907195],"iteration":171,"passed_time":0.3184121317,"remaining_time":0.05183453307},
+{"learn":[0.623028321],"iteration":172,"passed_time":0.3202036,"remaining_time":0.04997397225},
+{"learn":[0.6216858837],"iteration":173,"passed_time":0.3222818704,"remaining_time":0.04815706109},
+{"learn":[0.6202385865],"iteration":174,"passed_time":0.3240270986,"remaining_time":0.04628958551},
+{"learn":[0.6177599229],"iteration":175,"passed_time":0.325625778,"remaining_time":0.04440351518},
+{"learn":[0.6163950426],"iteration":176,"passed_time":0.3276964807,"remaining_time":0.04258202856},
+{"learn":[0.6153154716],"iteration":177,"passed_time":0.3292879606,"remaining_time":0.04069851198},
+{"learn":[0.6137768141],"iteration":178,"passed_time":0.3310503301,"remaining_time":0.03883830688},
+{"learn":[0.6122335411],"iteration":179,"passed_time":0.3328033649,"remaining_time":0.03697815165},
+{"learn":[0.6088666804],"iteration":180,"passed_time":0.3346318929,"remaining_time":0.03512710478},
+{"learn":[0.6076454874],"iteration":181,"passed_time":0.3364966389,"remaining_time":0.03327988737},
+{"learn":[0.6060398876],"iteration":182,"passed_time":0.3381532629,"remaining_time":0.03141314464},
+{"learn":[0.6041382727],"iteration":183,"passed_time":0.3399133396,"remaining_time":0.02955768171},
+{"learn":[0.6026335698],"iteration":184,"passed_time":0.341658234,"remaining_time":0.02770201897},
+{"learn":[0.6005888944],"iteration":185,"passed_time":0.3432049679,"remaining_time":0.025832632},
+{"learn":[0.5982179508],"iteration":186,"passed_time":0.3449097475,"remaining_time":0.02397768298},
+{"learn":[0.5967473936],"iteration":187,"passed_time":0.3465734418,"remaining_time":0.02212170905},
+{"learn":[0.5951356964],"iteration":188,"passed_time":0.348354104,"remaining_time":0.02027457748},
+{"learn":[0.5928623215],"iteration":189,"passed_time":0.3505087035,"remaining_time":0.0184478265},
+{"learn":[0.5912863691],"iteration":190,"passed_time":0.3521537006,"remaining_time":0.01659362987},
+{"learn":[0.5900052977],"iteration":191,"passed_time":0.353944627,"remaining_time":0.01474769279},
+{"learn":[0.5881995754],"iteration":192,"passed_time":0.355983025,"remaining_time":0.01291130143},
+{"learn":[0.5869825392],"iteration":193,"passed_time":0.3576171029,"remaining_time":0.01106032277},
+{"learn":[0.584950899],"iteration":194,"passed_time":0.3591092785,"remaining_time":0.009207930217},
+{"learn":[0.5821862973],"iteration":195,"passed_time":0.3607105283,"remaining_time":0.007361439353},
+{"learn":[0.5798523192],"iteration":196,"passed_time":0.3628981881,"remaining_time":0.005526368346},
+{"learn":[0.5789841317],"iteration":197,"passed_time":0.3647801393,"remaining_time":0.003684647872},
+{"learn":[0.5773771607],"iteration":198,"passed_time":0.3664826952,"remaining_time":0.001841621584},
+{"learn":[0.5750727931],"iteration":199,"passed_time":0.3681562943,"remaining_time":0}
 ]}

webapp/catboost_info/learn/events.out.tfevents CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f42e6245f9499a077aee7363d2094a4ec9f7ee73c1de24594f6bb1c91d6c8e5f
 size 12070

 version https://git-lfs.github.com/spec/v1
+oid sha256:d07956a3a8da8881442504752ad6649cc29acd6d9e73a31c71199d20da4bfbef
 size 12070

webapp/catboost_info/learn_error.tsv CHANGED Viewed

@@ -1,201 +1,201 @@
 iter	MultiClass
-0	1.091148423
-1	1.086418765
-2	1.0773991
-3	1.071672289
-4	1.066004714
-5	1.059330013
-6	1.05490226
-7	1.048588391
-8	1.043833033
-9	1.037311438
-10	1.031927739
-11	1.025841223
-12	1.021936274
-13	1.016038582
-14	1.011831586
-15	1.007661983
-16	1.00331399
-17	0.9969175017
-18	0.9915381213
-19	0.987027199
-20	0.9806451898
-21	0.97625914
-22	0.9684721006
-23	0.9650061706
-24	0.9574638884
-25	0.9535340201
-26	0.9493774514
-27	0.9458850596
-28	0.9407835813
-29	0.9365937962
-30	0.9314428367
-31	0.9270029216
-32	0.9234886611
-33	0.9183877748
-34	0.9155318031
-35	0.9112328302
-36	0.90756732
-37	0.9006450864
-38	0.898315496
-39	0.8949441838
-40	0.8903109401
-41	0.8866011593
-42	0.8818539442
-43	0.8766400926
-44	0.8711942711
-45	0.8664049339
-46	0.8605692297
-47	0.855305651
-48	0.8498381077
-49	0.8457962609
-50	0.8416413202
-51	0.8364743562
-52	0.8325194429
-53	0.8284764993
-54	0.8236751265
-55	0.8181696307
-56	0.814313061
-57	0.8115976577
-58	0.807758466
-59	0.802321422
-60	0.798807136
-61	0.7948400764
-62	0.7916880148
-63	0.7873311755
-64	0.7827613451
-65	0.7786702385
-66	0.7764977254
-67	0.7730194967
-68	0.7696007626
-69	0.7642752146
-70	0.760253606
-71	0.7577785571
-72	0.7539434543
-73	0.7495444532
-74	0.7455923189
-75	0.7429488753
-76	0.7400940248
-77	0.737176055
-78	0.7334430947
-79	0.7307758227
-80	0.7271924097
-81	0.7233691229
-82	0.7199860815
-83	0.7166443842
-84	0.7120905194
-85	0.7080689226
-86	0.7049974284
-87	0.7008658577
-88	0.6968041294
-89	0.6949067976
-90	0.6923908515
-91	0.6874475769
-92	0.6834969954
-93	0.6797426739
-94	0.676311281
-95	0.6732950825
-96	0.670343132
-97	0.6672527936
-98	0.6654513235
-99	0.6629430399
-100	0.6599425476
-101	0.6563456648
-102	0.6533420617
-103	0.6521604983
-104	0.6486732019
-105	0.6452204016
-106	0.6428646774
-107	0.6392699247
-108	0.6357158219
-109	0.6328221906
-110	0.6295509884
-111	0.6261120005
-112	0.6236044192
-113	0.6212460917
-114	0.6177241089
-115	0.6157604351
-116	0.6132566809
-117	0.6104162939
-118	0.6079636357
-119	0.6063102359
-120	0.602804494
-121	0.5991211844
-122	0.5958434122
-123	0.5932912801
-124	0.5918961165
-125	0.5896874223
-126	0.5868596712
-127	0.584461876
-128	0.5808172852
-129	0.5787637725
-130	0.5750088728
-131	0.5727294917
-132	0.5707527172
-133	0.5675028367
-134	0.5651542372
-135	0.5618670971
-136	0.5595066967
-137	0.5571694458
-138	0.5549336755
-139	0.5532775438
-140	0.5514989197
-141	0.5486121339
-142	0.546200445
-143	0.5435735305
-144	0.5412421678
-145	0.5400257427
-146	0.5377417958
-147	0.5355061154
-148	0.5336979056
-149	0.5324000403
-150	0.529710259
-151	0.5261298352
-152	0.524202506
-153	0.5218890341
-154	0.5199542674
-155	0.5170063595
-156	0.514594135
-157	0.5124848793
-158	0.5080709565
-159	0.5061450651
-160	0.5042162721
-161	0.5021700926
-162	0.4989800163
-163	0.4964408967
-164	0.494442716
-165	0.4921323494
-166	0.4892845676
-167	0.4867820408
-168	0.4847887077
-169	0.4819279446
-170	0.4800765981
-171	0.4776716805
-172	0.4756304192
-173	0.4738263312
-174	0.4708585845
-175	0.4695127327
-176	0.4680357734
-177	0.4656655379
-178	0.4641295221
-179	0.4629025785
-180	0.4611566405
-181	0.4583394488
-182	0.4557703947
-183	0.453369136
-184	0.4511683723
-185	0.4489689354
-186	0.4460674255
-187	0.4442073274
-188	0.4422920248
-189	0.4410763507
-190	0.4396820004
-191	0.4384941295
-192	0.4362369638
-193	0.434566568
-194	0.4327457921
-195	0.4307460928
-196	0.4291619818
-197	0.4275679969
-198	0.425495808
-199	0.4246656913

 iter	MultiClass
+0	1.093205242
+1	1.089296198
+2	1.085800463
+3	1.082940579
+4	1.076902286
+5	1.072424915
+6	1.070486521
+7	1.067709739
+8	1.063947286
+9	1.059022405
+10	1.054605749
+11	1.051037029
+12	1.046238247
+13	1.041694363
+14	1.037651376
+15	1.034067664
+16	1.031380894
+17	1.027554993
+18	1.023928755
+19	1.020661502
+20	1.015603636
+21	1.013288081
+22	1.010926858
+23	1.007751412
+24	1.004360662
+25	0.9993662657
+26	0.9969451375
+27	0.9945409056
+28	0.9907006517
+29	0.9885561607
+30	0.984248119
+31	0.9807137036
+32	0.9780850221
+33	0.9740531802
+34	0.9713410447
+35	0.967412389
+36	0.9644315086
+37	0.9592457648
+38	0.9566252893
+39	0.9529972622
+40	0.9503961485
+41	0.9478966399
+42	0.944616608
+43	0.9409584079
+44	0.9385875314
+45	0.9358293633
+46	0.9320133517
+47	0.9284762006
+48	0.9252772309
+49	0.9216958631
+50	0.918240019
+51	0.9149621868
+52	0.9118360189
+53	0.9081751934
+54	0.9050419952
+55	0.9017628251
+56	0.8993296976
+57	0.8964495626
+58	0.893758375
+59	0.891306073
+60	0.8896240451
+61	0.886672253
+62	0.8831787359
+63	0.8807247302
+64	0.8775340418
+65	0.8757817027
+66	0.8731628387
+67	0.870440569
+68	0.8676778125
+69	0.864782525
+70	0.8619514313
+71	0.8593813157
+72	0.8566989735
+73	0.8538559437
+74	0.8514273334
+75	0.8485875602
+76	0.8454171877
+77	0.8434237571
+78	0.8397825549
+79	0.8370105039
+80	0.8342251022
+81	0.8315255385
+82	0.8298459099
+83	0.8283612294
+84	0.8246264822
+85	0.8213473255
+86	0.8196070074
+87	0.8166511145
+88	0.8135280587
+89	0.81036371
+90	0.8066284815
+91	0.8038651041
+92	0.8010024622
+93	0.7965903543
+94	0.7938158579
+95	0.7917355846
+96	0.789231216
+97	0.7869594182
+98	0.7844456343
+99	0.7827182622
+100	0.7809365526
+101	0.7786073713
+102	0.7756969319
+103	0.7725361088
+104	0.7707524593
+105	0.7680995843
+106	0.7661193713
+107	0.7638792989
+108	0.7615820938
+109	0.7598842725
+110	0.7561665501
+111	0.7532353233
+112	0.7512791352
+113	0.7490483463
+114	0.7456312231
+115	0.7423207006
+116	0.7396266689
+117	0.7374406621
+118	0.7347465285
+119	0.7316488359
+120	0.7291388918
+121	0.7274213837
+122	0.7255509963
+123	0.7228838739
+124	0.7212786404
+125	0.7192994996
+126	0.7170966975
+127	0.7152245863
+128	0.7137431062
+129	0.7110045802
+130	0.7083429294
+131	0.7059155467
+132	0.7045010934
+133	0.701902575
+134	0.7001669261
+135	0.6980658317
+136	0.6961677749
+137	0.6930208614
+138	0.6907435615
+139	0.6889091552
+140	0.6860876176
+141	0.6828681848
+142	0.6806217027
+143	0.6774741387
+144	0.6755280009
+145	0.6741665407
+146	0.6714424836
+147	0.6690518867
+148	0.6670540904
+149	0.6654624264
+150	0.6635237109
+151	0.6614735621
+152	0.6601319451
+153	0.6574872141
+154	0.6558207681
+155	0.6541525713
+156	0.6527845902
+157	0.6505051681
+158	0.6491150255
+159	0.6468753418
+160	0.6442990818
+161	0.6423880435
+162	0.6402743573
+163	0.6387280436
+164	0.6370099287
+165	0.6359368009
+166	0.6342299731
+167	0.6325437121
+168	0.6296576212
+169	0.6276389296
+170	0.6260245742
+171	0.6248907195
+172	0.623028321
+173	0.6216858837
+174	0.6202385865
+175	0.6177599229
+176	0.6163950426
+177	0.6153154716
+178	0.6137768141
+179	0.6122335411
+180	0.6088666804
+181	0.6076454874
+182	0.6060398876
+183	0.6041382727
+184	0.6026335698
+185	0.6005888944
+186	0.5982179508
+187	0.5967473936
+188	0.5951356964
+189	0.5928623215
+190	0.5912863691
+191	0.5900052977
+192	0.5881995754
+193	0.5869825392
+194	0.584950899
+195	0.5821862973
+196	0.5798523192
+197	0.5789841317
+198	0.5773771607
+199	0.5750727931

webapp/catboost_info/time_left.tsv CHANGED Viewed

@@ -1,201 +1,201 @@
 iter	Passed	Remaining
-0	2	561
-1	4	461
-2	6	451
-3	9	452
-4	11	459
-5	14	457
-6	16	455
-7	18	449
-8	20	436
-9	22	426
-10	25	429
-11	27	428
-12	29	427
-13	31	420
-14	33	410
-15	35	412
-16	37	401
-17	39	394
-18	41	393
-19	43	391
-20	47	406
-21	57	466
-22	80	620
-23	93	682
-24	106	743
-25	120	809
-26	129	831
-27	138	850
-28	147	870
-29	154	874
-30	158	865
-31	161	850
-32	164	833
-33	169	826
-34	172	811
-35	175	797
-36	177	783
-37	181	771
-38	187	775
-39	190	762
-40	193	751
-41	196	738
-42	200	731
-43	202	717
-44	204	705
-45	207	694
-46	209	683
-47	212	674
-48	216	667
-49	218	655
-50	220	643
-51	223	635
-52	226	627
-53	229	619
-54	231	609
-55	232	599
-56	234	588
-57	237	581
-58	240	574
-59	243	567
-60	245	560
-61	248	552
-62	249	543
-63	252	535
-64	254	529
-65	258	524
-66	260	517
-67	263	510
-68	265	503
-69	268	497
-70	270	491
-71	272	484
-72	275	478
-73	277	472
-74	280	466
-75	282	460
-76	283	453
-77	286	447
-78	288	442
-79	290	436
-80	292	429
-81	295	424
-82	297	419
-83	299	414
-84	302	409
-85	305	404
-86	306	398
-87	308	392
-88	311	388
-89	314	383
-90	316	378
-91	318	374
-92	320	369
-93	322	363
-94	325	359
-95	328	356
-96	331	351
-97	333	347
-98	336	343
-99	338	338
-100	341	334
-101	343	330
-102	345	325
-103	347	320
-104	349	316
-105	351	311
-106	353	306
-107	355	302
-108	357	298
-109	360	294
-110	362	290
-111	364	286
-112	366	282
-113	367	277
-114	370	273
-115	372	269
-116	374	265
-117	377	262
-118	378	257
-119	380	253
-120	381	249
-121	383	245
-122	385	241
-123	387	237
-124	389	233
-125	391	229
-126	394	226
-127	395	222
-128	400	220
-129	406	219
-130	414	218
-131	420	216
-132	428	215
-133	435	214
-134	441	212
-135	452	212
-136	478	219
-137	487	219
-138	495	217
-139	501	214
-140	506	211
-141	512	209
-142	515	205
-143	519	201
-144	521	197
-145	523	193
-146	527	190
-147	530	186
-148	532	182
-149	535	178
-150	538	174
-151	542	171
-152	545	167
-153	547	163
-154	549	159
-155	552	155
-156	555	152
-157	560	148
-158	563	145
-159	568	142
-160	576	139
-161	578	135
-162	581	131
-163	584	128
-164	587	124
-165	591	121
-166	594	117
-167	597	113
-168	600	110
-169	602	106
-170	603	102
-171	607	98
-172	608	95
-173	611	91
-174	613	87
-175	615	83
-176	617	80
-177	620	76
-178	623	73
-179	625	69
-180	627	65
-181	630	62
-182	632	58
-183	635	55
-184	637	51
-185	639	48
-186	641	44
-187	642	41
-188	644	37
-189	646	34
-190	649	30
-191	651	27
-192	652	23
-193	654	20
-194	656	16
-195	658	13
-196	659	10
-197	666	6
-198	673	3
-199	679	0

 iter	Passed	Remaining
+0	2	537
+1	4	441
+2	6	415
+3	7	389
+4	9	379
+5	11	374
+6	13	367
+7	15	365
+8	17	368
+9	19	366
+10	21	362
+11	22	356
+12	24	348
+13	26	349
+14	28	349
+15	30	348
+16	31	343
+17	33	338
+18	35	333
+19	36	331
+20	38	328
+21	40	328
+22	42	324
+23	44	322
+24	45	320
+25	47	320
+26	50	320
+27	52	321
+28	54	320
+29	56	321
+30	58	321
+31	61	320
+32	63	319
+33	65	317
+34	66	314
+35	68	311
+36	70	308
+37	71	305
+38	73	303
+39	75	300
+40	77	298
+41	78	296
+42	80	293
+43	82	290
+44	83	289
+45	85	286
+46	87	284
+47	89	282
+48	90	279
+49	92	277
+50	94	274
+51	95	272
+52	97	270
+53	99	268
+54	101	266
+55	102	263
+56	104	262
+57	106	259
+58	107	256
+59	109	254
+60	110	252
+61	112	250
+62	114	248
+63	115	246
+64	117	244
+65	119	242
+66	120	240
+67	122	237
+68	124	235
+69	125	233
+70	127	231
+71	129	229
+72	131	228
+73	133	227
+74	135	225
+75	136	222
+76	138	220
+77	140	219
+78	157	241
+79	159	239
+80	161	237
+81	163	234
+82	165	233
+83	167	230
+84	168	228
+85	170	226
+86	172	224
+87	174	221
+88	175	219
+89	177	216
+90	178	214
+91	180	212
+92	182	209
+93	183	207
+94	185	205
+95	187	202
+96	188	200
+97	190	198
+98	192	196
+99	194	194
+100	195	191
+101	197	189
+102	198	187
+103	200	185
+104	202	183
+105	203	180
+106	205	179
+107	207	176
+108	209	174
+109	211	172
+110	212	170
+111	214	168
+112	216	166
+113	217	164
+114	219	162
+115	221	160
+116	223	158
+117	225	156
+118	226	154
+119	228	152
+120	230	150
+121	231	148
+122	233	146
+123	235	144
+124	236	142
+125	238	140
+126	240	138
+127	242	136
+128	243	134
+129	245	132
+130	247	130
+131	249	128
+132	251	126
+133	253	124
+134	254	122
+135	256	120
+136	258	118
+137	259	116
+138	261	114
+139	262	112
+140	264	110
+141	266	108
+142	268	106
+143	270	105
+144	271	103
+145	273	101
+146	275	99
+147	276	97
+148	278	95
+149	280	93
+150	282	91
+151	284	89
+152	286	87
+153	288	86
+154	290	84
+155	291	82
+156	293	80
+157	295	78
+158	296	76
+159	298	74
+160	300	72
+161	301	70
+162	303	68
+163	304	66
+164	306	65
+165	308	63
+166	310	61
+167	311	59
+168	313	57
+169	315	55
+170	316	53
+171	318	51
+172	320	49
+173	322	48
+174	324	46
+175	325	44
+176	327	42
+177	329	40
+178	331	38
+179	332	36
+180	334	35
+181	336	33
+182	338	31
+183	339	29
+184	341	27
+185	343	25
+186	344	23
+187	346	22
+188	348	20
+189	350	18
+190	352	16
+191	353	14
+192	355	12
+193	357	11
+194	359	9
+195	360	7
+196	362	5
+197	364	3
+198	366	1
+199	368	0

webapp/ensemble.py CHANGED Viewed

@@ -33,7 +33,7 @@ def select_top_models(results: dict, builders: dict, task: str, n: int = 3):
     Only includes models that have >0.5 ROC-AUC or >0.0 R².
     """
     primary   = "roc_auc" if task == "classification" else "r2"
-    threshold = -1.0  # Always include models if they didn't error
     ranked = []
     for name in builders:
@@ -67,14 +67,11 @@ def run_voting_ensemble(top_pairs: list, X: pd.DataFrame, y: pd.Series,
     n_classes = int(y.nunique()) if task == "classification" else None
     fold_results = []
-    # Pre-fit encoders on full X
-    X_full_p, global_encoders = prep_fn(X)
     for tr_idx, val_idx in splits:
         Xtr, Xval = X.iloc[tr_idx], X.iloc[val_idx]
         ytr, yval = y.iloc[tr_idx], y.iloc[val_idx]
-        Xtr_p, _  = prep_fn(Xtr, encoders=global_encoders)
-        Xval_p, _ = prep_fn(Xval, encoders=global_encoders)
         t0 = time.perf_counter()
@@ -174,14 +171,11 @@ def run_stacking_ensemble(sklearn_pairs: list, X: pd.DataFrame, y: pd.Series,
     fold_results = []
-    # Pre-fit encoders on full X
-    X_full_p, global_encoders = prep_fn(X)
     for tr_idx, val_idx in splits:
         Xtr, Xval = X.iloc[tr_idx], X.iloc[val_idx]
         ytr, yval = y.iloc[tr_idx], y.iloc[val_idx]
-        Xtr_p, _  = prep_fn(Xtr, encoders=global_encoders)
-        Xval_p, _ = prep_fn(Xval, encoders=global_encoders)
         estimators = [(name, builder(task)) for name, builder in sklearn_pairs]

     Only includes models that have >0.5 ROC-AUC or >0.0 R².
     """
     primary   = "roc_auc" if task == "classification" else "r2"
+    threshold = 0.50 if task == "classification" else 0.0
     ranked = []
     for name in builders:
     n_classes = int(y.nunique()) if task == "classification" else None
     fold_results = []
     for tr_idx, val_idx in splits:
         Xtr, Xval = X.iloc[tr_idx], X.iloc[val_idx]
         ytr, yval = y.iloc[tr_idx], y.iloc[val_idx]
+        Xtr_p, encoders = prep_fn(Xtr)
+        Xval_p, _       = prep_fn(Xval, encoders=encoders)
         t0 = time.perf_counter()
     fold_results = []
     for tr_idx, val_idx in splits:
         Xtr, Xval = X.iloc[tr_idx], X.iloc[val_idx]
         ytr, yval = y.iloc[tr_idx], y.iloc[val_idx]
+        Xtr_p, encoders = prep_fn(Xtr)
+        Xval_p, _       = prep_fn(Xval, encoders=encoders)
         estimators = [(name, builder(task)) for name, builder in sklearn_pairs]

webapp/main.py CHANGED Viewed

@@ -1,43 +1,27 @@
-import sys
 import io, os
-import logging
 from pathlib import Path
 from dotenv import load_dotenv
 import pandas as pd
-import numpy as np
 from fastapi import FastAPI, File, UploadFile, Form, HTTPException
-from fastapi.responses import JSONResponse, FileResponse
 from fastapi.staticfiles import StaticFiles
-# Flexible imports to support both HF Space (root) and local execution (inside webapp)
 try:
-    from webapp.benchmark import run_benchmark, infer_task, BUILDERS, _prep, _encode_target
-except (ImportError, ModuleNotFoundError):
-    from benchmark import run_benchmark, infer_task, BUILDERS, _prep, _encode_target
-# Setup logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-# Load .env
-BASE_DIR = Path(__file__).resolve().parent.parent
-load_dotenv(BASE_DIR / "webapp" / ".env")
-# Verify Secrets on startup
-hf_token = os.environ.get('HUGGING_FACE_HUB_TOKEN')
-logger.info(f"TABPFN_TOKEN status: {'SET' if os.environ.get('TABPFN_TOKEN') else 'MISSING'}")
-logger.info(f"HF_TOKEN status:     {'SET' if hf_token else 'MISSING'}")
-if hf_token:
-    try:
-        from huggingface_hub import login
-        login(token=hf_token, add_to_git_credential=False)
-        logger.info("Successfully logged into Hugging Face Hub.")
-    except Exception as e:
-        logger.warning(f"Hugging Face login failed: {e}")
 # ── Config ─────────────────────────────────────────────────────────────────────
-MAX_FILE_BYTES = int(os.getenv("MAX_FILE_SIZE_MB", "5")) * 1024 * 1024
 app = FastAPI(title="SAP RPT-1 Benchmarking API", version="1.0.0")
 # ── Static files (frontend) ────────────────────────────────────────────────────
@@ -194,15 +178,9 @@ async def benchmark(
                 feature_types[col] = "categorical"
         result["dataset_info"]["feature_types"] = feature_types
-        # Add a sample row for the playground preview
-        result["dataset_info"]["preview"] = [df.head(1).fillna("").to_dict("records")[0]]
         # Cache the Best Overall model for the Live Playground
         best_name = result["recommendation"]["recommendations"]["best_overall"]["model"]
-        # Subsample for training the champion model if too large (Demo speed)
-        if len(df) > 1000:
-            df = df.sample(n=1000, random_state=42)
         X = df.drop(columns=[target_col])
         y_raw = df[target_col]
         task = result["dataset_info"]["task"]
@@ -229,13 +207,11 @@ async def benchmark(
             "name": best_name,
             "task": task,
             "features": list(X.columns),
-            "labels": [str(c) for c in le.classes_] if le else None,
             "encoders": feat_encoders  # Store these for the /predict endpoint!
         }
     except Exception as e:
-        import traceback
-        traceback.print_exc()
         raise HTTPException(500, f"Benchmarking failed: {e}")
     return JSONResponse(result)
@@ -253,19 +229,15 @@ async def predict(data: dict):
     try:
         # Convert input dict to DataFrame
         input_df = pd.DataFrame([data])
-        # Ensure column order matches training, filling missing with 0/empty
-        for col in CHAMPION_INFO["features"]:
-            if col not in input_df.columns:
-                input_df[col] = 0
         input_df = input_df[CHAMPION_INFO["features"]]
         # Use the EXACT same encoders that were used during training
         X_test, _ = _prep(input_df, encoders=CHAMPION_INFO.get("encoders"))
-        logger.info(f"Predicting for {CHAMPION_INFO['name']}...")
         if CHAMPION_INFO["task"] == "classification":
-            raw_pred = np.array(CHAMPION_MODEL.predict(X_test))
             # Flatten if nested (CatBoost/Sklearn sometimes return [[val]] or [val])
             pred_val = raw_pred.ravel()[0]
             pred_idx = int(pred_val)
@@ -286,7 +258,7 @@ async def predict(data: dict):
                 "labels": CHAMPION_INFO["labels"]
             }
         else:
-            raw_pred = np.array(CHAMPION_MODEL.predict(X_test))
             pred = float(raw_pred.ravel()[0])
             return {"prediction": pred}

+"""
+main.py — FastAPI backend for the SAP RPT-1 Benchmarking Web App.
+"""
 import io, os
 from pathlib import Path
 from dotenv import load_dotenv
+# Load .env before anything else so HF_TOKEN is available to benchmark.py
+load_dotenv(Path(__file__).parent / ".env")
 import pandas as pd
 from fastapi import FastAPI, File, UploadFile, Form, HTTPException
+from fastapi.responses import JSONResponse
 from fastapi.staticfiles import StaticFiles
 try:
+    from benchmark import run_benchmark, infer_task
+except ImportError:
+    from webapp.benchmark import run_benchmark, infer_task
 # ── Config ─────────────────────────────────────────────────────────────────────
+MAX_FILE_BYTES = int(os.getenv("MAX_FILE_SIZE_MB", "5")) * 1024 * 1024   # default 5 MB
 app = FastAPI(title="SAP RPT-1 Benchmarking API", version="1.0.0")
 # ── Static files (frontend) ────────────────────────────────────────────────────
                 feature_types[col] = "categorical"
         result["dataset_info"]["feature_types"] = feature_types
         # Cache the Best Overall model for the Live Playground
         best_name = result["recommendation"]["recommendations"]["best_overall"]["model"]
+        from benchmark import BUILDERS, _prep, _encode_target
         X = df.drop(columns=[target_col])
         y_raw = df[target_col]
         task = result["dataset_info"]["task"]
             "name": best_name,
             "task": task,
             "features": list(X.columns),
+            "labels": list(le.classes_) if le else None,
             "encoders": feat_encoders  # Store these for the /predict endpoint!
         }
     except Exception as e:
         raise HTTPException(500, f"Benchmarking failed: {e}")
     return JSONResponse(result)
     try:
         # Convert input dict to DataFrame
         input_df = pd.DataFrame([data])
+        # Ensure column order matches training
         input_df = input_df[CHAMPION_INFO["features"]]
+        from benchmark import _prep
         # Use the EXACT same encoders that were used during training
         X_test, _ = _prep(input_df, encoders=CHAMPION_INFO.get("encoders"))
         if CHAMPION_INFO["task"] == "classification":
+            raw_pred = CHAMPION_MODEL.predict(X_test)
             # Flatten if nested (CatBoost/Sklearn sometimes return [[val]] or [val])
             pred_val = raw_pred.ravel()[0]
             pred_idx = int(pred_val)
                 "labels": CHAMPION_INFO["labels"]
             }
         else:
+            raw_pred = CHAMPION_MODEL.predict(X_test)
             pred = float(raw_pred.ravel()[0])
             return {"prediction": pred}

webapp/requirements.txt CHANGED Viewed

@@ -6,9 +6,7 @@ xgboost>=2.0.0
 lightgbm>=4.0.0
 catboost>=1.2.0
 scikit-learn>=1.3.0
-scipy>=1.10.0
 pandas>=2.0.0
 numpy>=1.24.0
-tabpfn>=2.0.0
 huggingface_hub
-sentence-transformers

 lightgbm>=4.0.0
 catboost>=1.2.0
 scikit-learn>=1.3.0
 pandas>=2.0.0
 numpy>=1.24.0
+tabpfn>=7.1.1
 huggingface_hub

webapp/sap_rpt1.egg-info/SOURCES.txt DELETED Viewed

@@ -1,15 +0,0 @@
-README.md
-setup.py
-webapp/models/__init__.py
-webapp/models/autogluon_wrapper.py
-webapp/models/base_wrapper.py
-webapp/models/baseline_wrappers.py
-webapp/models/sap_rpt1_hf_wrapper.py
-webapp/models/sap_rpt1_wrapper.py
-webapp/models/tabicl_wrapper.py
-webapp/models/tabpfn_wrapper.py
-webapp/sap_rpt1.egg-info/PKG-INFO
-webapp/sap_rpt1.egg-info/SOURCES.txt
-webapp/sap_rpt1.egg-info/dependency_links.txt
-webapp/sap_rpt1.egg-info/requires.txt
-webapp/sap_rpt1.egg-info/top_level.txt

webapp/sap_rpt1.egg-info/top_level.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- models

webapp/static/app.js CHANGED Viewed

@@ -502,8 +502,7 @@ function renderPlayground(datasetInfo, bestOverall, task) {
   if (!form || !bestOverall) return;
   form.innerHTML = "";
-  const targetCol = String(datasetInfo.target_col || "").trim().toLowerCase();
-  const features = (datasetInfo.columns || []).filter(c => String(c).trim().toLowerCase() !== targetCol);
   const preview = datasetInfo.preview ? datasetInfo.preview[0] : {};
   features.forEach(f => {
@@ -553,7 +552,7 @@ function renderPlayground(datasetInfo, bestOverall, task) {
       }
       if (task === "classification") {
-        valueEl.textContent = (res.prediction !== undefined && res.prediction !== null) ? res.prediction : "—";
         subEl.textContent = `Most likely class (via ${bestOverall.model})`;
         if (res.probabilities && res.labels) {

   if (!form || !bestOverall) return;
   form.innerHTML = "";
+  const features = datasetInfo.columns || [];
   const preview = datasetInfo.preview ? datasetInfo.preview[0] : {};
   features.forEach(f => {
       }
       if (task === "classification") {
+        valueEl.textContent = res.prediction || "—";
         subEl.textContent = `Most likely class (via ${bestOverall.model})`;
         if (res.probabilities && res.labels) {