"""Excel → PostgreSQL data synchronization script. Usage: python data_sync.py path/to/file.xlsx python data_sync.py path/to/folder/ (syncs all .xlsx files) Normalizes column names to lowercase with underscores, then upserts each sheet/file into a PostgreSQL table. """ import os import re import sys import pandas as pd from sqlalchemy import text from db.connection import get_engine def normalize_column(name: str) -> str: """Lowercase, strip, and replace non-alphanumeric chars with underscore.""" name = str(name).strip().lower() name = re.sub(r"[^a-z0-9]+", "_", name) name = name.strip("_") return name or "unnamed_col" def sync_dataframe(df: pd.DataFrame, table_name: str) -> None: """Write a DataFrame to PostgreSQL, replacing the existing table.""" engine = get_engine() # Normalize columns df.columns = [normalize_column(c) for c in df.columns] # Deduplicate column names seen: dict[str, int] = {} new_cols: list[str] = [] for col in df.columns: if col in seen: seen[col] += 1 new_cols.append(f"{col}_{seen[col]}") else: seen[col] = 0 new_cols.append(col) df.columns = new_cols df.to_sql(table_name, engine, if_exists="replace", index=False) print(f" āœ“ Table '{table_name}' synced — {len(df)} rows, {len(df.columns)} columns") def sync_excel(filepath: str) -> None: """Sync all sheets in an Excel file to separate tables.""" basename = os.path.splitext(os.path.basename(filepath))[0] table_name = normalize_column(basename) xls = pd.ExcelFile(filepath) sheets = xls.sheet_names if len(sheets) == 1: df = pd.read_excel(filepath, sheet_name=sheets[0]) sync_dataframe(df, table_name) else: for sheet in sheets: df = pd.read_excel(filepath, sheet_name=sheet) sheet_table = f"{table_name}_{normalize_column(sheet)}" sync_dataframe(df, sheet_table) def main() -> None: if len(sys.argv) < 2: print("Usage: python data_sync.py ") sys.exit(1) target = sys.argv[1] if os.path.isdir(target): files = [ os.path.join(target, f) for f in os.listdir(target) if f.endswith((".xlsx", ".xls")) ] if not files: print(f"No Excel files found in {target}") sys.exit(1) for fp in sorted(files): print(f"Syncing: {fp}") sync_excel(fp) elif os.path.isfile(target): print(f"Syncing: {target}") sync_excel(target) else: print(f"Path not found: {target}") sys.exit(1) print("\nāœ“ Data sync complete.") if __name__ == "__main__": main()