Update app/mapper.py
Browse files- app/mapper.py +15 -4
app/mapper.py
CHANGED
|
@@ -20,6 +20,12 @@ CANONICAL = {
|
|
| 20 |
}
|
| 21 |
ALIAS_FILE = "./db/alias_memory.json"
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
# ---------- helpers ---------- #
|
| 24 |
def safe_str_transform(s: pd.Series) -> pd.Series:
|
| 25 |
if pd.api.types.is_string_dtype(s):
|
|
@@ -30,11 +36,16 @@ def sql(conn, stmt: str, *args):
|
|
| 30 |
"""Centralised parameter binding → no more int-vs-tuple mistakes."""
|
| 31 |
return conn.execute(stmt, args).fetchall()
|
| 32 |
|
| 33 |
-
def add_column_if_not_exists(
|
| 34 |
-
|
| 35 |
-
if col.lower() not in
|
| 36 |
-
|
|
|
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
# ---------- alias memory ---------- #
|
| 39 |
def load_dynamic_aliases() -> None:
|
| 40 |
if os.path.exists(ALIAS_FILE):
|
|
|
|
| 20 |
}
|
| 21 |
ALIAS_FILE = "./db/alias_memory.json"
|
| 22 |
|
| 23 |
+
def map_pandas_to_duck(col: str, series: pd.Series) -> str:
|
| 24 |
+
if pd.api.types.is_bool_dtype(series): return "BOOLEAN"
|
| 25 |
+
if pd.api.types.is_integer_dtype(series): return "BIGINT"
|
| 26 |
+
if pd.api.types.is_float_dtype(series): return "DOUBLE"
|
| 27 |
+
if pd.api.types.is_datetime64_any_dtype(series): return "TIMESTAMP"
|
| 28 |
+
return "VARCHAR"
|
| 29 |
# ---------- helpers ---------- #
|
| 30 |
def safe_str_transform(s: pd.Series) -> pd.Series:
|
| 31 |
if pd.api.types.is_string_dtype(s):
|
|
|
|
| 36 |
"""Centralised parameter binding → no more int-vs-tuple mistakes."""
|
| 37 |
return conn.execute(stmt, args).fetchall()
|
| 38 |
|
| 39 |
+
def add_column_if_not_exists(duck: duckdb.DuckDBPyConnection, table: str, col: str, dtype: str) -> None:
|
| 40 |
+
existing = {r[0].lower() for r in duck.execute(f"PRAGMA table_info('{table}')").fetchall()}
|
| 41 |
+
if col.lower() not in existing:
|
| 42 |
+
duck.execute(f"ALTER TABLE {table} ADD COLUMN {col} {dtype}")
|
| 43 |
+
print(f"[schema] ➕ added {col}:{dtype} to {table}")
|
| 44 |
|
| 45 |
+
# usage in ensure_schema_version:
|
| 46 |
+
for col in df.columns:
|
| 47 |
+
dtype = map_pandas_to_duck(col, df[col])
|
| 48 |
+
add_column_if_not_exists(duck, table_name, col, dtype)
|
| 49 |
# ---------- alias memory ---------- #
|
| 50 |
def load_dynamic_aliases() -> None:
|
| 51 |
if os.path.exists(ALIAS_FILE):
|