Spaces:

amitlal
/

sap-finance-dashboard-RPT-1-OSS

Running

sap-finance-dashboard-RPT-1-OSS / models /rpt_model.py

amitlals

Add models directory with RPT model wrapper

974a628 5 months ago

7.45 kB

	"""
	SAP-RPT-1-OSS Model Wrapper

	Provides a wrapper for SAP-RPT-OSS-Classifier and Regressor with
	authentication handling and CPU fallback options.
	"""

	import os
	import logging
	from typing import Optional, Union
	import pandas as pd
	import numpy as np
	from huggingface_hub import login as hf_login
	from dotenv import load_dotenv

	# Try to import SAP-RPT-OSS models
	try:
	from sap_rpt_oss import SAP_RPT_OSS_Classifier, SAP_RPT_OSS_Regressor
	SAP_RPT_AVAILABLE = True
	except ImportError:
	SAP_RPT_AVAILABLE = False
	logging.warning("sap-rpt-oss package not installed. Install with: pip install git+https://github.com/SAP-samples/sap-rpt-1-oss")

	load_dotenv()

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)


	class RPTModelWrapper:
	"""Wrapper for SAP-RPT-1-OSS models with authentication and resource management."""

	def __init__(self, model_type: str = "classifier", max_context_size: int = 2048, bagging: int = 1):
	"""
	Initialize the RPT model wrapper.

	Args:
	model_type: "classifier" or "regressor"
	max_context_size: Maximum context size (8192 for best performance, 2048 for CPU)
	bagging: Bagging factor (8 for best performance, 1 for lightweight)
	"""
	if not SAP_RPT_AVAILABLE:
	raise ImportError("sap-rpt-oss package is not installed. Please install it first.")

	self.model_type = model_type.lower()
	self.max_context_size = max_context_size
	self.bagging = bagging
	self.model = None
	self.is_fitted = False

	# Check for Hugging Face token
	self._check_hf_authentication()

	# Initialize model
	self._initialize_model()

	def _check_hf_authentication(self):
	"""Check and handle Hugging Face authentication."""
	hf_token = os.getenv("HUGGINGFACE_TOKEN")

	if hf_token:
	try:
	hf_login(token=hf_token)
	logger.info("Hugging Face authentication successful using token from environment.")
	except Exception as e:
	logger.warning(f"Failed to login with token: {e}. Trying interactive login...")
	try:
	hf_login()
	except Exception as e2:
	logger.error(f"Hugging Face authentication failed: {e2}")
	else:
	logger.warning("HUGGINGFACE_TOKEN not found in environment. Attempting interactive login...")
	try:
	hf_login()
	except Exception as e:
	logger.error(f"Hugging Face authentication failed: {e}")
	logger.info("Please set HUGGINGFACE_TOKEN in .env file or run: huggingface-cli login")

	def _initialize_model(self):
	"""Initialize the appropriate model based on type."""
	try:
	if self.model_type == "classifier":
	self.model = SAP_RPT_OSS_Classifier(
	max_context_size=self.max_context_size,
	bagging=self.bagging
	)
	logger.info(f"Initialized SAP-RPT-OSS-Classifier with context_size={self.max_context_size}, bagging={self.bagging}")
	elif self.model_type == "regressor":
	self.model = SAP_RPT_OSS_Regressor(
	max_context_size=self.max_context_size,
	bagging=self.bagging
	)
	logger.info(f"Initialized SAP-RPT-OSS-Regressor with context_size={self.max_context_size}, bagging={self.bagging}")
	else:
	raise ValueError(f"Invalid model_type: {self.model_type}. Must be 'classifier' or 'regressor'")
	except Exception as e:
	logger.error(f"Failed to initialize model: {e}")
	raise

	def fit(self, X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray]):
	"""
	Fit the model on training data.

	Args:
	X: Feature data (DataFrame or array)
	y: Target data (Series or array)
	"""
	try:
	if isinstance(X, np.ndarray):
	# Convert to DataFrame if needed
	X = pd.DataFrame(X)
	if isinstance(y, np.ndarray):
	y = pd.Series(y)

	logger.info(f"Fitting model on {len(X)} samples...")
	self.model.fit(X, y)
	self.is_fitted = True
	logger.info("Model fitting completed successfully.")
	except Exception as e:
	logger.error(f"Error during model fitting: {e}")
	raise

	def predict(self, X: Union[pd.DataFrame, np.ndarray]):
	"""
	Make predictions.

	Args:
	X: Feature data (DataFrame or array)

	Returns:
	Predictions (array)
	"""
	if not self.is_fitted:
	raise ValueError("Model must be fitted before making predictions. Call fit() first.")

	try:
	if isinstance(X, np.ndarray):
	X = pd.DataFrame(X)

	logger.info(f"Making predictions on {len(X)} samples...")
	predictions = self.model.predict(X)
	return predictions
	except Exception as e:
	logger.error(f"Error during prediction: {e}")
	raise

	def predict_proba(self, X: Union[pd.DataFrame, np.ndarray]):
	"""
	Predict class probabilities (classification only).

	Args:
	X: Feature data (DataFrame or array)

	Returns:
	Probability predictions (array)
	"""
	if self.model_type != "classifier":
	raise ValueError("predict_proba() is only available for classifiers.")

	if not self.is_fitted:
	raise ValueError("Model must be fitted before making predictions. Call fit() first.")

	try:
	if isinstance(X, np.ndarray):
	X = pd.DataFrame(X)

	logger.info(f"Predicting probabilities on {len(X)} samples...")
	probabilities = self.model.predict_proba(X)
	return probabilities
	except Exception as e:
	logger.error(f"Error during probability prediction: {e}")
	raise

	def get_model_info(self):
	"""Get information about the current model configuration."""
	return {
	"model_type": self.model_type,
	"max_context_size": self.max_context_size,
	"bagging": self.bagging,
	"is_fitted": self.is_fitted,
	"sap_rpt_available": SAP_RPT_AVAILABLE
	}


	def create_model(model_type: str = "classifier", use_gpu: bool = True):
	"""
	Factory function to create a model with appropriate settings.

	Args:
	model_type: "classifier" or "regressor"
	use_gpu: Whether to use GPU-optimized settings (requires 80GB GPU memory)

	Returns:
	RPTModelWrapper instance
	"""
	if use_gpu:
	# Best performance settings (requires 80GB GPU)
	return RPTModelWrapper(
	model_type=model_type,
	max_context_size=8192,
	bagging=8
	)
	else:
	# CPU-friendly settings
	return RPTModelWrapper(
	model_type=model_type,
	max_context_size=2048,
	bagging=1
	)