Peter Mutwiri commited on
Commit
b97b03b
·
1 Parent(s): cbd7757

data cleaning before insert to remove infinity numbers

Browse files
Files changed (1) hide show
  1. app/mapper.py +4 -0
app/mapper.py CHANGED
@@ -451,6 +451,7 @@ def canonify_df(org_id: str, source_id: str, hours_window: int = 24) -> tuple[pd
451
  table_name = ensure_canonical_table(duck, df, entity_type)
452
 
453
  # 8c) Transactional insert
 
454
  if not df.empty:
455
  table_info = duck.execute(f"PRAGMA table_info('{table_name}')").fetchall()
456
  table_cols = [str(r[0]) for r in table_info]
@@ -458,6 +459,9 @@ def canonify_df(org_id: str, source_id: str, hours_window: int = 24) -> tuple[pd
458
  df_to_insert = df[[col for col in df.columns if col in table_cols]]
459
 
460
  if not df_to_insert.empty:
 
 
 
461
  cols_str = ", ".join(df_to_insert.columns)
462
  placeholders = ", ".join(["?"] * len(df_to_insert.columns))
463
 
 
451
  table_name = ensure_canonical_table(duck, df, entity_type)
452
 
453
  # 8c) Transactional insert
454
+ # 8d) Clean and insert data
455
  if not df.empty:
456
  table_info = duck.execute(f"PRAGMA table_info('{table_name}')").fetchall()
457
  table_cols = [str(r[0]) for r in table_info]
 
459
  df_to_insert = df[[col for col in df.columns if col in table_cols]]
460
 
461
  if not df_to_insert.empty:
462
+ # 🔧 CRITICAL: Replace NaN/Infinity with None for JSON compliance
463
+ df_to_insert = df_to_insert.replace([np.inf, -np.inf, np.nan], None)
464
+
465
  cols_str = ", ".join(df_to_insert.columns)
466
  placeholders = ", ".join(["?"] * len(df_to_insert.columns))
467