Spaces:

Pulastya0
/

Data-Science-Agent

Running

App Files Files Community

Pulastya B commited on Jan 29

Commit

95da84f

1 Parent(s): d8b38c5

Fixed JSON Serializability error

Browse files

Files changed (2) hide show

src/session_store.py +5 -1
src/tools/data_cleaning.py +8 -8

src/session_store.py CHANGED Viewed

@@ -98,7 +98,7 @@ class SessionStore:
     def _make_json_serializable(self, obj: Any) -> Any:
         """
         Convert objects to JSON-serializable format.
-        Handles matplotlib Figures, plotly Figures, numpy arrays, and other non-serializable types.
         """
         try:
             import numpy as np
@@ -113,6 +113,10 @@ class SessionStore:
         elif isinstance(obj, (list, tuple)):
             return [self._make_json_serializable(item) for item in obj]
         # Handle matplotlib Figure objects
         elif hasattr(obj, '__class__') and 'Figure' in obj.__class__.__name__:
             return f"<{obj.__class__.__name__} object: {id(obj)}>"

     def _make_json_serializable(self, obj: Any) -> Any:
         """
         Convert objects to JSON-serializable format.
+        Handles matplotlib Figures, plotly Figures, numpy arrays, datetime objects, and other non-serializable types.
         """
         try:
             import numpy as np
         elif isinstance(obj, (list, tuple)):
             return [self._make_json_serializable(item) for item in obj]
+        # Handle datetime objects
+        elif isinstance(obj, (datetime, timedelta)):
+            return obj.isoformat()
         # Handle matplotlib Figure objects
         elif hasattr(obj, '__class__') and 'Figure' in obj.__class__.__name__:
             return f"<{obj.__class__.__name__} object: {id(obj)}>"

src/tools/data_cleaning.py CHANGED Viewed

@@ -250,14 +250,14 @@ def clean_missing_values(file_path: str, strategy,
     return report
-def handle_outliers(file_path: str, method: str, columns: List[str],
                    output_path: str) -> Dict[str, Any]:
     """
     Detect and handle outliers in numeric columns.
     Args:
         file_path: Path to CSV or Parquet file
-        method: Method to handle outliers ('clip', 'winsorize', 'remove')
         columns: List of columns to check, or ['all'] for all numeric columns
         output_path: Path to save cleaned dataset
@@ -299,7 +299,7 @@ def handle_outliers(file_path: str, method: str, columns: List[str],
     report = {
         "original_rows": len(df),
-        "method": method,
         "columns_processed": {}
     }
@@ -333,14 +333,14 @@ def handle_outliers(file_path: str, method: str, columns: List[str],
             }
             continue
-        # Apply method
-        if method == "clip":
-            # Clip values to bounds
             df = df.with_columns(
                 pl.col(col).clip(lower_bound, upper_bound).alias(col)
             )
-        elif method == "winsorize":
             # Winsorize: cap at 1st and 99th percentiles
             p1 = col_data.quantile(0.01)
             p99 = col_data.quantile(0.99)
@@ -348,7 +348,7 @@ def handle_outliers(file_path: str, method: str, columns: List[str],
                 pl.col(col).clip(p1, p99).alias(col)
             )
-        elif method == "remove":
             # Remove rows with outliers
             df = df.filter(~outliers_mask)

     return report
+def handle_outliers(file_path: str, strategy: str, columns: List[str],
                    output_path: str) -> Dict[str, Any]:
     """
     Detect and handle outliers in numeric columns.
     Args:
         file_path: Path to CSV or Parquet file
+        strategy: Method to handle outliers ('clip', 'cap', 'winsorize', 'remove')
         columns: List of columns to check, or ['all'] for all numeric columns
         output_path: Path to save cleaned dataset
     report = {
         "original_rows": len(df),
+        "strategy": strategy,
         "columns_processed": {}
     }
             }
             continue
+        # Apply strategy
+        if strategy == "clip" or strategy == "cap":
+            # Clip/cap values to bounds
             df = df.with_columns(
                 pl.col(col).clip(lower_bound, upper_bound).alias(col)
             )
+        elif strategy == "winsorize":
             # Winsorize: cap at 1st and 99th percentiles
             p1 = col_data.quantile(0.01)
             p99 = col_data.quantile(0.99)
                 pl.col(col).clip(p1, p99).alias(col)
             )
+        elif strategy == "remove":
             # Remove rows with outliers
             df = df.filter(~outliers_mask)