Pulastya B commited on
Commit
95da84f
·
1 Parent(s): d8b38c5

Fixed JSON Serializability error

Browse files
src/session_store.py CHANGED
@@ -98,7 +98,7 @@ class SessionStore:
98
  def _make_json_serializable(self, obj: Any) -> Any:
99
  """
100
  Convert objects to JSON-serializable format.
101
- Handles matplotlib Figures, plotly Figures, numpy arrays, and other non-serializable types.
102
  """
103
  try:
104
  import numpy as np
@@ -113,6 +113,10 @@ class SessionStore:
113
  elif isinstance(obj, (list, tuple)):
114
  return [self._make_json_serializable(item) for item in obj]
115
 
 
 
 
 
116
  # Handle matplotlib Figure objects
117
  elif hasattr(obj, '__class__') and 'Figure' in obj.__class__.__name__:
118
  return f"<{obj.__class__.__name__} object: {id(obj)}>"
 
98
  def _make_json_serializable(self, obj: Any) -> Any:
99
  """
100
  Convert objects to JSON-serializable format.
101
+ Handles matplotlib Figures, plotly Figures, numpy arrays, datetime objects, and other non-serializable types.
102
  """
103
  try:
104
  import numpy as np
 
113
  elif isinstance(obj, (list, tuple)):
114
  return [self._make_json_serializable(item) for item in obj]
115
 
116
+ # Handle datetime objects
117
+ elif isinstance(obj, (datetime, timedelta)):
118
+ return obj.isoformat()
119
+
120
  # Handle matplotlib Figure objects
121
  elif hasattr(obj, '__class__') and 'Figure' in obj.__class__.__name__:
122
  return f"<{obj.__class__.__name__} object: {id(obj)}>"
src/tools/data_cleaning.py CHANGED
@@ -250,14 +250,14 @@ def clean_missing_values(file_path: str, strategy,
250
  return report
251
 
252
 
253
- def handle_outliers(file_path: str, method: str, columns: List[str],
254
  output_path: str) -> Dict[str, Any]:
255
  """
256
  Detect and handle outliers in numeric columns.
257
 
258
  Args:
259
  file_path: Path to CSV or Parquet file
260
- method: Method to handle outliers ('clip', 'winsorize', 'remove')
261
  columns: List of columns to check, or ['all'] for all numeric columns
262
  output_path: Path to save cleaned dataset
263
 
@@ -299,7 +299,7 @@ def handle_outliers(file_path: str, method: str, columns: List[str],
299
 
300
  report = {
301
  "original_rows": len(df),
302
- "method": method,
303
  "columns_processed": {}
304
  }
305
 
@@ -333,14 +333,14 @@ def handle_outliers(file_path: str, method: str, columns: List[str],
333
  }
334
  continue
335
 
336
- # Apply method
337
- if method == "clip":
338
- # Clip values to bounds
339
  df = df.with_columns(
340
  pl.col(col).clip(lower_bound, upper_bound).alias(col)
341
  )
342
 
343
- elif method == "winsorize":
344
  # Winsorize: cap at 1st and 99th percentiles
345
  p1 = col_data.quantile(0.01)
346
  p99 = col_data.quantile(0.99)
@@ -348,7 +348,7 @@ def handle_outliers(file_path: str, method: str, columns: List[str],
348
  pl.col(col).clip(p1, p99).alias(col)
349
  )
350
 
351
- elif method == "remove":
352
  # Remove rows with outliers
353
  df = df.filter(~outliers_mask)
354
 
 
250
  return report
251
 
252
 
253
+ def handle_outliers(file_path: str, strategy: str, columns: List[str],
254
  output_path: str) -> Dict[str, Any]:
255
  """
256
  Detect and handle outliers in numeric columns.
257
 
258
  Args:
259
  file_path: Path to CSV or Parquet file
260
+ strategy: Method to handle outliers ('clip', 'cap', 'winsorize', 'remove')
261
  columns: List of columns to check, or ['all'] for all numeric columns
262
  output_path: Path to save cleaned dataset
263
 
 
299
 
300
  report = {
301
  "original_rows": len(df),
302
+ "strategy": strategy,
303
  "columns_processed": {}
304
  }
305
 
 
333
  }
334
  continue
335
 
336
+ # Apply strategy
337
+ if strategy == "clip" or strategy == "cap":
338
+ # Clip/cap values to bounds
339
  df = df.with_columns(
340
  pl.col(col).clip(lower_bound, upper_bound).alias(col)
341
  )
342
 
343
+ elif strategy == "winsorize":
344
  # Winsorize: cap at 1st and 99th percentiles
345
  p1 = col_data.quantile(0.01)
346
  p99 = col_data.quantile(0.99)
 
348
  pl.col(col).clip(p1, p99).alias(col)
349
  )
350
 
351
+ elif strategy == "remove":
352
  # Remove rows with outliers
353
  df = df.filter(~outliers_mask)
354