Spaces:

amitgpt
/

sap-finance-dashboard-RPT-1-OSS

Running

App Files Files Community

amitlals commited on 25 days ago

Commit

974a628

1 Parent(s): fc8c40e

Add models directory with RPT model wrapper

Browse files

Files changed (3) hide show

.gitignore +9 -5
models/__init__.py +2 -0
models/rpt_model.py +210 -0

.gitignore CHANGED Viewed

@@ -37,12 +37,16 @@ ENV/
 .env
 .env.local
-# Model cache
 .cache/
-models/
-*.pth
-*.pt
-*.ckpt
 # Data files (optional - uncomment if you don't want to track data)
 # data/*.csv

 .env
 .env.local
+# Model cache and downloaded models
 .cache/
+models/*.pth
+models/*.pt
+models/*.ckpt
+models/*.bin
+models/*.safetensors
+# But keep Python source files in models/
+!models/*.py
+!models/__init__.py
 # Data files (optional - uncomment if you don't want to track data)
 # data/*.csv

models/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # Models package
2	+

models/rpt_model.py ADDED Viewed

	@@ -0,0 +1,210 @@

+"""
+SAP-RPT-1-OSS Model Wrapper
+Provides a wrapper for SAP-RPT-OSS-Classifier and Regressor with
+authentication handling and CPU fallback options.
+"""
+import os
+import logging
+from typing import Optional, Union
+import pandas as pd
+import numpy as np
+from huggingface_hub import login as hf_login
+from dotenv import load_dotenv
+# Try to import SAP-RPT-OSS models
+try:
+    from sap_rpt_oss import SAP_RPT_OSS_Classifier, SAP_RPT_OSS_Regressor
+    SAP_RPT_AVAILABLE = True
+except ImportError:
+    SAP_RPT_AVAILABLE = False
+    logging.warning("sap-rpt-oss package not installed. Install with: pip install git+https://github.com/SAP-samples/sap-rpt-1-oss")
+load_dotenv()
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class RPTModelWrapper:
+    """Wrapper for SAP-RPT-1-OSS models with authentication and resource management."""
+    def __init__(self, model_type: str = "classifier", max_context_size: int = 2048, bagging: int = 1):
+        """
+        Initialize the RPT model wrapper.
+        Args:
+            model_type: "classifier" or "regressor"
+            max_context_size: Maximum context size (8192 for best performance, 2048 for CPU)
+            bagging: Bagging factor (8 for best performance, 1 for lightweight)
+        """
+        if not SAP_RPT_AVAILABLE:
+            raise ImportError("sap-rpt-oss package is not installed. Please install it first.")
+        self.model_type = model_type.lower()
+        self.max_context_size = max_context_size
+        self.bagging = bagging
+        self.model = None
+        self.is_fitted = False
+        # Check for Hugging Face token
+        self._check_hf_authentication()
+        # Initialize model
+        self._initialize_model()
+    def _check_hf_authentication(self):
+        """Check and handle Hugging Face authentication."""
+        hf_token = os.getenv("HUGGINGFACE_TOKEN")
+        if hf_token:
+            try:
+                hf_login(token=hf_token)
+                logger.info("Hugging Face authentication successful using token from environment.")
+            except Exception as e:
+                logger.warning(f"Failed to login with token: {e}. Trying interactive login...")
+                try:
+                    hf_login()
+                except Exception as e2:
+                    logger.error(f"Hugging Face authentication failed: {e2}")
+        else:
+            logger.warning("HUGGINGFACE_TOKEN not found in environment. Attempting interactive login...")
+            try:
+                hf_login()
+            except Exception as e:
+                logger.error(f"Hugging Face authentication failed: {e}")
+                logger.info("Please set HUGGINGFACE_TOKEN in .env file or run: huggingface-cli login")
+    def _initialize_model(self):
+        """Initialize the appropriate model based on type."""
+        try:
+            if self.model_type == "classifier":
+                self.model = SAP_RPT_OSS_Classifier(
+                    max_context_size=self.max_context_size,
+                    bagging=self.bagging
+                )
+                logger.info(f"Initialized SAP-RPT-OSS-Classifier with context_size={self.max_context_size}, bagging={self.bagging}")
+            elif self.model_type == "regressor":
+                self.model = SAP_RPT_OSS_Regressor(
+                    max_context_size=self.max_context_size,
+                    bagging=self.bagging
+                )
+                logger.info(f"Initialized SAP-RPT-OSS-Regressor with context_size={self.max_context_size}, bagging={self.bagging}")
+            else:
+                raise ValueError(f"Invalid model_type: {self.model_type}. Must be 'classifier' or 'regressor'")
+        except Exception as e:
+            logger.error(f"Failed to initialize model: {e}")
+            raise
+    def fit(self, X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray]):
+        """
+        Fit the model on training data.
+        Args:
+            X: Feature data (DataFrame or array)
+            y: Target data (Series or array)
+        """
+        try:
+            if isinstance(X, np.ndarray):
+                # Convert to DataFrame if needed
+                X = pd.DataFrame(X)
+            if isinstance(y, np.ndarray):
+                y = pd.Series(y)
+            logger.info(f"Fitting model on {len(X)} samples...")
+            self.model.fit(X, y)
+            self.is_fitted = True
+            logger.info("Model fitting completed successfully.")
+        except Exception as e:
+            logger.error(f"Error during model fitting: {e}")
+            raise
+    def predict(self, X: Union[pd.DataFrame, np.ndarray]):
+        """
+        Make predictions.
+        Args:
+            X: Feature data (DataFrame or array)
+        Returns:
+            Predictions (array)
+        """
+        if not self.is_fitted:
+            raise ValueError("Model must be fitted before making predictions. Call fit() first.")
+        try:
+            if isinstance(X, np.ndarray):
+                X = pd.DataFrame(X)
+            logger.info(f"Making predictions on {len(X)} samples...")
+            predictions = self.model.predict(X)
+            return predictions
+        except Exception as e:
+            logger.error(f"Error during prediction: {e}")
+            raise
+    def predict_proba(self, X: Union[pd.DataFrame, np.ndarray]):
+        """
+        Predict class probabilities (classification only).
+        Args:
+            X: Feature data (DataFrame or array)
+        Returns:
+            Probability predictions (array)
+        """
+        if self.model_type != "classifier":
+            raise ValueError("predict_proba() is only available for classifiers.")
+        if not self.is_fitted:
+            raise ValueError("Model must be fitted before making predictions. Call fit() first.")
+        try:
+            if isinstance(X, np.ndarray):
+                X = pd.DataFrame(X)
+            logger.info(f"Predicting probabilities on {len(X)} samples...")
+            probabilities = self.model.predict_proba(X)
+            return probabilities
+        except Exception as e:
+            logger.error(f"Error during probability prediction: {e}")
+            raise
+    def get_model_info(self):
+        """Get information about the current model configuration."""
+        return {
+            "model_type": self.model_type,
+            "max_context_size": self.max_context_size,
+            "bagging": self.bagging,
+            "is_fitted": self.is_fitted,
+            "sap_rpt_available": SAP_RPT_AVAILABLE
+        }
+def create_model(model_type: str = "classifier", use_gpu: bool = True):
+    """
+    Factory function to create a model with appropriate settings.
+    Args:
+        model_type: "classifier" or "regressor"
+        use_gpu: Whether to use GPU-optimized settings (requires 80GB GPU memory)
+    Returns:
+        RPTModelWrapper instance
+    """
+    if use_gpu:
+        # Best performance settings (requires 80GB GPU)
+        return RPTModelWrapper(
+            model_type=model_type,
+            max_context_size=8192,
+            bagging=8
+        )
+    else:
+        # CPU-friendly settings
+        return RPTModelWrapper(
+            model_type=model_type,
+            max_context_size=2048,
+            bagging=1
+        )