Spaces:

PD03
/

RICA-AIRevenueIntelligenceAgent

Sleeping

App Files Files Community

PD03 commited on Aug 31, 2025

Commit

9794e0d

verified ·

1 Parent(s): e7df474

Update utils/model_trainer.py

Browse files

Files changed (1) hide show

utils/model_trainer.py +10 -7

utils/model_trainer.py CHANGED Viewed

@@ -22,6 +22,9 @@ class EmbeddedChurnTrainer:
     def __init__(self):
         self.model_path = Path('models/churn_model_v1.pkl')
         self.metadata_path = Path('models/model_metadata.json')
     def model_exists(self):
         """Check if trained model exists"""
@@ -33,18 +36,18 @@ class EmbeddedChurnTrainer:
         try:
             conn = duckdb.connect(':memory:')
-            # Load SAP datasets
             conn.execute("""
                 CREATE TABLE customers AS
                 SELECT * FROM 'hf://datasets/SAP/SALT/I_Customer.parquet'
                 LIMIT 5000
-            """)  # Limit for HF Spaces performance
             conn.execute("""
                 CREATE TABLE sales_docs AS
                 SELECT * FROM 'hf://datasets/SAP/SALT/I_SalesDocument.parquet'
                 LIMIT 10000
-            """)  # Limit for HF Spaces performance
             # Join data
             training_data = conn.execute("""
@@ -71,7 +74,7 @@ class EmbeddedChurnTrainer:
             return pd.DataFrame()
     def train_model_if_needed(self):
-        """Train model if it doesn't exist, with progress bar"""
         if self.model_exists():
             return self.load_existing_metadata()
@@ -115,7 +118,7 @@ class EmbeddedChurnTrainer:
             return None
     def engineer_features(self, data):
-        """Streamlined feature engineering for HF Spaces"""
         # Customer-level aggregation
         customer_features = data.groupby('Customer').agg({
             'CustomerName': 'first',
@@ -178,9 +181,9 @@ class EmbeddedChurnTrainer:
             X, y, test_size=0.2, random_state=42, stratify=y
         )
-        # Train model
         self.model = RandomForestClassifier(
-            n_estimators=50,  # Reduced for HF Spaces performance
             max_depth=8,
             min_samples_split=20,
             class_weight='balanced',

     def __init__(self):
         self.model_path = Path('models/churn_model_v1.pkl')
         self.metadata_path = Path('models/model_metadata.json')
+        self.model = None
+        self.label_encoders = {}
+        self.feature_columns = []
     def model_exists(self):
         """Check if trained model exists"""
         try:
             conn = duckdb.connect(':memory:')
+            # Load SAP datasets with limits for HF Spaces performance
             conn.execute("""
                 CREATE TABLE customers AS
                 SELECT * FROM 'hf://datasets/SAP/SALT/I_Customer.parquet'
                 LIMIT 5000
+            """)
             conn.execute("""
                 CREATE TABLE sales_docs AS
                 SELECT * FROM 'hf://datasets/SAP/SALT/I_SalesDocument.parquet'
                 LIMIT 10000
+            """)
             # Join data
             training_data = conn.execute("""
             return pd.DataFrame()
     def train_model_if_needed(self):
+        """Train model if it doesn't exist, with progress updates"""
         if self.model_exists():
             return self.load_existing_metadata()
             return None
     def engineer_features(self, data):
+        """Feature engineering for churn prediction"""
         # Customer-level aggregation
         customer_features = data.groupby('Customer').agg({
             'CustomerName': 'first',
             X, y, test_size=0.2, random_state=42, stratify=y
         )
+        # Train model (optimized for HF Spaces)
         self.model = RandomForestClassifier(
+            n_estimators=50,  # Reduced for performance
             max_depth=8,
             min_samples_split=20,
             class_weight='balanced',