Spaces:
Running
Running
Pulastya B commited on
Commit ·
95da84f
1
Parent(s): d8b38c5
Fixed JSON Serializability error
Browse files- src/session_store.py +5 -1
- src/tools/data_cleaning.py +8 -8
src/session_store.py
CHANGED
|
@@ -98,7 +98,7 @@ class SessionStore:
|
|
| 98 |
def _make_json_serializable(self, obj: Any) -> Any:
|
| 99 |
"""
|
| 100 |
Convert objects to JSON-serializable format.
|
| 101 |
-
Handles matplotlib Figures, plotly Figures, numpy arrays, and other non-serializable types.
|
| 102 |
"""
|
| 103 |
try:
|
| 104 |
import numpy as np
|
|
@@ -113,6 +113,10 @@ class SessionStore:
|
|
| 113 |
elif isinstance(obj, (list, tuple)):
|
| 114 |
return [self._make_json_serializable(item) for item in obj]
|
| 115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
# Handle matplotlib Figure objects
|
| 117 |
elif hasattr(obj, '__class__') and 'Figure' in obj.__class__.__name__:
|
| 118 |
return f"<{obj.__class__.__name__} object: {id(obj)}>"
|
|
|
|
| 98 |
def _make_json_serializable(self, obj: Any) -> Any:
|
| 99 |
"""
|
| 100 |
Convert objects to JSON-serializable format.
|
| 101 |
+
Handles matplotlib Figures, plotly Figures, numpy arrays, datetime objects, and other non-serializable types.
|
| 102 |
"""
|
| 103 |
try:
|
| 104 |
import numpy as np
|
|
|
|
| 113 |
elif isinstance(obj, (list, tuple)):
|
| 114 |
return [self._make_json_serializable(item) for item in obj]
|
| 115 |
|
| 116 |
+
# Handle datetime objects
|
| 117 |
+
elif isinstance(obj, (datetime, timedelta)):
|
| 118 |
+
return obj.isoformat()
|
| 119 |
+
|
| 120 |
# Handle matplotlib Figure objects
|
| 121 |
elif hasattr(obj, '__class__') and 'Figure' in obj.__class__.__name__:
|
| 122 |
return f"<{obj.__class__.__name__} object: {id(obj)}>"
|
src/tools/data_cleaning.py
CHANGED
|
@@ -250,14 +250,14 @@ def clean_missing_values(file_path: str, strategy,
|
|
| 250 |
return report
|
| 251 |
|
| 252 |
|
| 253 |
-
def handle_outliers(file_path: str,
|
| 254 |
output_path: str) -> Dict[str, Any]:
|
| 255 |
"""
|
| 256 |
Detect and handle outliers in numeric columns.
|
| 257 |
|
| 258 |
Args:
|
| 259 |
file_path: Path to CSV or Parquet file
|
| 260 |
-
|
| 261 |
columns: List of columns to check, or ['all'] for all numeric columns
|
| 262 |
output_path: Path to save cleaned dataset
|
| 263 |
|
|
@@ -299,7 +299,7 @@ def handle_outliers(file_path: str, method: str, columns: List[str],
|
|
| 299 |
|
| 300 |
report = {
|
| 301 |
"original_rows": len(df),
|
| 302 |
-
"
|
| 303 |
"columns_processed": {}
|
| 304 |
}
|
| 305 |
|
|
@@ -333,14 +333,14 @@ def handle_outliers(file_path: str, method: str, columns: List[str],
|
|
| 333 |
}
|
| 334 |
continue
|
| 335 |
|
| 336 |
-
# Apply
|
| 337 |
-
if
|
| 338 |
-
# Clip values to bounds
|
| 339 |
df = df.with_columns(
|
| 340 |
pl.col(col).clip(lower_bound, upper_bound).alias(col)
|
| 341 |
)
|
| 342 |
|
| 343 |
-
elif
|
| 344 |
# Winsorize: cap at 1st and 99th percentiles
|
| 345 |
p1 = col_data.quantile(0.01)
|
| 346 |
p99 = col_data.quantile(0.99)
|
|
@@ -348,7 +348,7 @@ def handle_outliers(file_path: str, method: str, columns: List[str],
|
|
| 348 |
pl.col(col).clip(p1, p99).alias(col)
|
| 349 |
)
|
| 350 |
|
| 351 |
-
elif
|
| 352 |
# Remove rows with outliers
|
| 353 |
df = df.filter(~outliers_mask)
|
| 354 |
|
|
|
|
| 250 |
return report
|
| 251 |
|
| 252 |
|
| 253 |
+
def handle_outliers(file_path: str, strategy: str, columns: List[str],
|
| 254 |
output_path: str) -> Dict[str, Any]:
|
| 255 |
"""
|
| 256 |
Detect and handle outliers in numeric columns.
|
| 257 |
|
| 258 |
Args:
|
| 259 |
file_path: Path to CSV or Parquet file
|
| 260 |
+
strategy: Method to handle outliers ('clip', 'cap', 'winsorize', 'remove')
|
| 261 |
columns: List of columns to check, or ['all'] for all numeric columns
|
| 262 |
output_path: Path to save cleaned dataset
|
| 263 |
|
|
|
|
| 299 |
|
| 300 |
report = {
|
| 301 |
"original_rows": len(df),
|
| 302 |
+
"strategy": strategy,
|
| 303 |
"columns_processed": {}
|
| 304 |
}
|
| 305 |
|
|
|
|
| 333 |
}
|
| 334 |
continue
|
| 335 |
|
| 336 |
+
# Apply strategy
|
| 337 |
+
if strategy == "clip" or strategy == "cap":
|
| 338 |
+
# Clip/cap values to bounds
|
| 339 |
df = df.with_columns(
|
| 340 |
pl.col(col).clip(lower_bound, upper_bound).alias(col)
|
| 341 |
)
|
| 342 |
|
| 343 |
+
elif strategy == "winsorize":
|
| 344 |
# Winsorize: cap at 1st and 99th percentiles
|
| 345 |
p1 = col_data.quantile(0.01)
|
| 346 |
p99 = col_data.quantile(0.99)
|
|
|
|
| 348 |
pl.col(col).clip(p1, p99).alias(col)
|
| 349 |
)
|
| 350 |
|
| 351 |
+
elif strategy == "remove":
|
| 352 |
# Remove rows with outliers
|
| 353 |
df = df.filter(~outliers_mask)
|
| 354 |
|