Spaces:

Akshay4506
/

ModelMatrix

Running

App Files Files Community

ModelMatrix / matrix /code /models /autogluon_wrapper.py

Akshay4506

Fix deployment entry point and merge requirements

c4ff02d 4 days ago

raw

history blame contribute delete

6.47 kB

	"""
	AutoGluon Wrapper
	=================

	Sklearn-compatible wrapper for AutoGluon Tabular.

	AutoGluon is an AutoML framework that automatically
	trains and ensembles multiple models.

	Author: UW MSIM Team
	Date: November 2025
	"""

	import time
	import logging
	from typing import Optional, Union
	import numpy as np
	import pandas as pd
	import tempfile
	import shutil

	from .base_wrapper import BaseModelWrapper

	logger = logging.getLogger(__name__)


	class AutoGluonWrapper(BaseModelWrapper):
	"""
	AutoGluon Tabular wrapper.

	Parameters
	----------
	task_type : str, default='classification'
	Task type: 'classification' or 'regression'
	time_limit : int, default=300
	Time limit for training in seconds
	preset : str, default='medium_quality'
	Preset: 'best_quality', 'high_quality', 'good_quality', 'medium_quality'
	eval_metric : str, optional
	Evaluation metric (auto-detected if None)
	random_state : int, default=42
	Random seed
	"""

	def __init__(
	self,
	task_type: str = 'classification',
	time_limit: int = 300,
	preset: str = 'medium_quality',
	eval_metric: Optional[str] = None,
	random_state: int = 42
	):
	super().__init__(task_type=task_type, random_state=random_state)
	self.time_limit = time_limit
	self.preset = preset
	self.eval_metric = eval_metric
	self._temp_dir = None

	def fit(self, X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray]) -> 'AutoGluonWrapper':
	"""
	Fit AutoGluon model.

	Parameters
	----------
	X : pd.DataFrame or np.ndarray, shape (n_samples, n_features)
	Training features
	y : pd.Series or np.ndarray, shape (n_samples,)
	Training target

	Returns
	-------
	self : AutoGluonWrapper
	Fitted model
	"""
	self._validate_input(X, y)

	logger.info(f"Fitting AutoGluon ({self.preset}) on {X.shape[0]} samples...")
	start_time = time.time()

	try:
	from autogluon.tabular import TabularPredictor

	# Convert to DataFrame if needed
	if isinstance(X, np.ndarray):
	X = pd.DataFrame(X, columns=[f'feature_{i}' for i in range(X.shape[1])])

	if isinstance(y, np.ndarray):
	y = pd.Series(y, name='target')

	# Combine X and y for AutoGluon
	train_data = X.copy()
	train_data['target'] = y.values

	# Create temporary directory for model
	self._temp_dir = tempfile.mkdtemp(prefix='autogluon_')

	# Auto-detect problem type
	problem_type = 'binary' if self.task_type == 'classification' and len(np.unique(y)) == 2 else None
	if self.task_type == 'regression':
	problem_type = 'regression'
	elif self.task_type == 'classification' and len(np.unique(y)) > 2:
	problem_type = 'multiclass'

	# Initialize predictor
	self.model = TabularPredictor(
	label='target',
	problem_type=problem_type,
	eval_metric=self.eval_metric,
	path=self._temp_dir,
	verbosity=2
	)

	# Fit model
	self.model.fit(
	train_data=train_data,
	time_limit=self.time_limit,
	presets=self.preset
	)

	self.is_fitted = True
	self.fit_time = time.time() - start_time

	# Log leaderboard
	leaderboard = self.model.leaderboard(silent=True)
	best_model = leaderboard.iloc[0]['model']
	logger.info(f"AutoGluon fitted in {self.fit_time:.2f} seconds. Best model: {best_model}")

	except ImportError:
	logger.error("AutoGluon not installed")
	raise ImportError("Install AutoGluon with: pip install autogluon.tabular[all]")
	except Exception as e:
	logger.error(f"Error fitting AutoGluon: {e}")
	raise

	return self

	def predict(self, X: Union[pd.DataFrame, np.ndarray]) -> np.ndarray:
	"""
	Make predictions with AutoGluon.

	Parameters
	----------
	X : pd.DataFrame or np.ndarray, shape (n_samples, n_features)
	Test features

	Returns
	-------
	predictions : np.ndarray, shape (n_samples,)
	Predicted values or class labels
	"""
	if not self.is_fitted:
	raise ValueError("Model not fitted. Call fit() first.")

	self._validate_input(X)

	logger.info(f"Predicting on {X.shape[0]} samples with AutoGluon...")
	start_time = time.time()

	try:
	# Convert to DataFrame if needed
	if isinstance(X, np.ndarray):
	X = pd.DataFrame(X, columns=[f'feature_{i}' for i in range(X.shape[1])])

	predictions = self.model.predict(X).values
	self.predict_time = time.time() - start_time

	logger.info(f"Predictions complete in {self.predict_time:.2f} seconds")

	return predictions

	except Exception as e:
	logger.error(f"Error during prediction: {e}")
	raise

	def _predict_proba_impl(self, X: Union[pd.DataFrame, np.ndarray]) -> np.ndarray:
	"""
	Predict class probabilities with AutoGluon.

	Parameters
	----------
	X : pd.DataFrame or np.ndarray, shape (n_samples, n_features)
	Test features

	Returns
	-------
	probabilities : np.ndarray, shape (n_samples, n_classes)
	Class probabilities
	"""
	if isinstance(X, np.ndarray):
	X = pd.DataFrame(X, columns=[f'feature_{i}' for i in range(X.shape[1])])

	return self.model.predict_proba(X).values

	def get_params(self, deep: bool = True) -> dict:
	"""Get parameters for this estimator."""
	params = super().get_params(deep)
	params.update({
	'time_limit': self.time_limit,
	'preset': self.preset,
	'eval_metric': self.eval_metric
	})
	return params

	def __del__(self):
	"""Clean up temporary directory on deletion."""
	if self._temp_dir and self._temp_dir.startswith('/tmp'):
	try:
	shutil.rmtree(self._temp_dir)
	except:
	pass