""" Daily Data Refresh — BigQuery → SQLite → HF Space Schedule this notebook in Google Colab: Edit → Triggers → Add trigger → Daily What it does: 1. Authenticates with Google Cloud 2. Pulls Team Mapping and Client data from BigQuery 3. Parses and writes to a SQLite database 4. Uploads the DB to Hugging Face Space persistent storage Environment variables needed: HF_TOKEN — Hugging Face write token (set in Colab secrets) """ # ── Config ─────────────────────────────────────────────────── HF_REPO_ID = "your-username/producer-intake" # ← Change this BQ_PROJECT = "your-gcp-project" # ← Change this TEAM_MAPPING_QUERY = "SELECT * FROM `dataset.team_mapping`" CLIENT_EXTRACT_QUERY = "SELECT * FROM `dataset.client_extract`" DB_PATH = "/content/app_data.db" # ── 1. Install dependencies ───────────────────────────────── import subprocess subprocess.run(["pip", "install", "-q", "google-cloud-bigquery", "huggingface_hub", "pandas"], capture_output=True) # ── 2. Authenticate ───────────────────────────────────────── from google.colab import auth as colab_auth colab_auth.authenticate_user() from google.cloud import bigquery import pandas as pd import sqlite3 import os client = bigquery.Client(project=BQ_PROJECT) # ── 3. Pull data from BigQuery ─────────────────────────────── print("📥 Pulling Team Mapping...") df_team = client.query(TEAM_MAPPING_QUERY).to_dataframe() print(f" → {len(df_team)} rows") print("📥 Pulling Client Extract...") df_clients = client.query(CLIENT_EXTRACT_QUERY).to_dataframe() print(f" → {len(df_clients)} rows") # ── 4. Parse Team Mapping into producers table ─────────────── def parse_producers(df): """Convert raw team mapping into the producers table format.""" records = [] for _, row in df.iterrows(): code = str(row.iloc[0]).strip() name = str(row.iloc[1]).strip() # Skip rules (same as notebook) if not code or not name: continue if code.upper() == "PRODUCER CODE" or name.upper() == "PRODUCER NAME": continue if name == "House Account": continue if len(code) < 2 or not code[-1].isdigit(): continue prefix = code[:-1] suffix = int(code[-1]) records.append({ "code": code, "prefix": prefix, "suffix": suffix, "name": name, "is_active": 1, }) return pd.DataFrame(records) def parse_clients(df): """Convert raw client extract into the clients table format.""" records = [] # Skip header/metadata rows for _, row in df.iloc[1:].iterrows(): code = str(row.iloc[0]).strip() name = str(row.iloc[1]).strip() if not code or not name: continue records.append({ "lookup_code": code, "name": name, "is_active": 1, }) return pd.DataFrame(records) df_producers = parse_producers(df_team) df_clients_parsed = parse_clients(df_clients) print(f"\n✅ Parsed {len(df_producers)} producer entries") print(f"✅ Parsed {len(df_clients_parsed)} client entries") # ── 5. Write to SQLite ────────────────────────────────────── if os.path.exists(DB_PATH): os.remove(DB_PATH) conn = sqlite3.connect(DB_PATH) # We only replace the DATA tables (producers, clients). # The CONFIG tables (agreements, constants, etc.) live in app.db # and are managed through the admin panel. df_producers.to_sql("producers", conn, if_exists="replace", index=False) df_clients_parsed.to_sql("clients", conn, if_exists="replace", index=False) conn.close() size_kb = os.path.getsize(DB_PATH) / 1024 print(f"\n💾 SQLite DB: {size_kb:.0f} KB") # ── 6. Upload to HF Space ─────────────────────────────────── from huggingface_hub import HfApi from google.colab import userdata hf_token = userdata.get("HF_TOKEN") # Stored in Colab secrets api = HfApi(token=hf_token) print("📤 Uploading to HF Space...") api.upload_file( path_or_fileobj=DB_PATH, path_in_repo="data/app_data.db", repo_id=HF_REPO_ID, repo_type="space", ) print("✅ Daily refresh complete!") print(f" Producers: {len(df_producers)}") print(f" Clients: {len(df_clients_parsed)}")