Peter Mutwiri commited on
Commit ·
b97b03b
1
Parent(s): cbd7757
data cleaning before insert to remove infinity numbers
Browse files- app/mapper.py +4 -0
app/mapper.py
CHANGED
|
@@ -451,6 +451,7 @@ def canonify_df(org_id: str, source_id: str, hours_window: int = 24) -> tuple[pd
|
|
| 451 |
table_name = ensure_canonical_table(duck, df, entity_type)
|
| 452 |
|
| 453 |
# 8c) Transactional insert
|
|
|
|
| 454 |
if not df.empty:
|
| 455 |
table_info = duck.execute(f"PRAGMA table_info('{table_name}')").fetchall()
|
| 456 |
table_cols = [str(r[0]) for r in table_info]
|
|
@@ -458,6 +459,9 @@ def canonify_df(org_id: str, source_id: str, hours_window: int = 24) -> tuple[pd
|
|
| 458 |
df_to_insert = df[[col for col in df.columns if col in table_cols]]
|
| 459 |
|
| 460 |
if not df_to_insert.empty:
|
|
|
|
|
|
|
|
|
|
| 461 |
cols_str = ", ".join(df_to_insert.columns)
|
| 462 |
placeholders = ", ".join(["?"] * len(df_to_insert.columns))
|
| 463 |
|
|
|
|
| 451 |
table_name = ensure_canonical_table(duck, df, entity_type)
|
| 452 |
|
| 453 |
# 8c) Transactional insert
|
| 454 |
+
# 8d) Clean and insert data
|
| 455 |
if not df.empty:
|
| 456 |
table_info = duck.execute(f"PRAGMA table_info('{table_name}')").fetchall()
|
| 457 |
table_cols = [str(r[0]) for r in table_info]
|
|
|
|
| 459 |
df_to_insert = df[[col for col in df.columns if col in table_cols]]
|
| 460 |
|
| 461 |
if not df_to_insert.empty:
|
| 462 |
+
# 🔧 CRITICAL: Replace NaN/Infinity with None for JSON compliance
|
| 463 |
+
df_to_insert = df_to_insert.replace([np.inf, -np.inf, np.nan], None)
|
| 464 |
+
|
| 465 |
cols_str = ", ".join(df_to_insert.columns)
|
| 466 |
placeholders = ", ".join(["?"] * len(df_to_insert.columns))
|
| 467 |
|