petermutwiri commited on
Commit
29d20a0
·
verified ·
1 Parent(s): c9de484

Update app/mapper.py

Browse files
Files changed (1) hide show
  1. app/mapper.py +15 -4
app/mapper.py CHANGED
@@ -20,6 +20,12 @@ CANONICAL = {
20
  }
21
  ALIAS_FILE = "./db/alias_memory.json"
22
 
 
 
 
 
 
 
23
  # ---------- helpers ---------- #
24
  def safe_str_transform(s: pd.Series) -> pd.Series:
25
  if pd.api.types.is_string_dtype(s):
@@ -30,11 +36,16 @@ def sql(conn, stmt: str, *args):
30
  """Centralised parameter binding → no more int-vs-tuple mistakes."""
31
  return conn.execute(stmt, args).fetchall()
32
 
33
- def add_column_if_not_exists(conn, table: str, col: str, dtype: str) -> None:
34
- cols = {c[0] for c in conn.execute(f"DESCRIBE {table}").fetchall()}
35
- if col.lower() not in cols:
36
- conn.execute(f"ALTER TABLE {table} ADD COLUMN {col} {dtype}")
 
37
 
 
 
 
 
38
  # ---------- alias memory ---------- #
39
  def load_dynamic_aliases() -> None:
40
  if os.path.exists(ALIAS_FILE):
 
20
  }
21
  ALIAS_FILE = "./db/alias_memory.json"
22
 
23
+ def map_pandas_to_duck(col: str, series: pd.Series) -> str:
24
+ if pd.api.types.is_bool_dtype(series): return "BOOLEAN"
25
+ if pd.api.types.is_integer_dtype(series): return "BIGINT"
26
+ if pd.api.types.is_float_dtype(series): return "DOUBLE"
27
+ if pd.api.types.is_datetime64_any_dtype(series): return "TIMESTAMP"
28
+ return "VARCHAR"
29
  # ---------- helpers ---------- #
30
  def safe_str_transform(s: pd.Series) -> pd.Series:
31
  if pd.api.types.is_string_dtype(s):
 
36
  """Centralised parameter binding → no more int-vs-tuple mistakes."""
37
  return conn.execute(stmt, args).fetchall()
38
 
39
+ def add_column_if_not_exists(duck: duckdb.DuckDBPyConnection, table: str, col: str, dtype: str) -> None:
40
+ existing = {r[0].lower() for r in duck.execute(f"PRAGMA table_info('{table}')").fetchall()}
41
+ if col.lower() not in existing:
42
+ duck.execute(f"ALTER TABLE {table} ADD COLUMN {col} {dtype}")
43
+ print(f"[schema] ➕ added {col}:{dtype} to {table}")
44
 
45
+ # usage in ensure_schema_version:
46
+ for col in df.columns:
47
+ dtype = map_pandas_to_duck(col, df[col])
48
+ add_column_if_not_exists(duck, table_name, col, dtype)
49
  # ---------- alias memory ---------- #
50
  def load_dynamic_aliases() -> None:
51
  if os.path.exists(ALIAS_FILE):