Spaces:
Sleeping
Sleeping
| """ | |
| Daily Data Refresh β BigQuery β SQLite β HF Space | |
| Schedule this notebook in Google Colab: | |
| Edit β Triggers β Add trigger β Daily | |
| What it does: | |
| 1. Authenticates with Google Cloud | |
| 2. Pulls Team Mapping and Client data from BigQuery | |
| 3. Parses and writes to a SQLite database | |
| 4. Uploads the DB to Hugging Face Space persistent storage | |
| Environment variables needed: | |
| HF_TOKEN β Hugging Face write token (set in Colab secrets) | |
| """ | |
| # ββ Config βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| HF_REPO_ID = "your-username/producer-intake" # β Change this | |
| BQ_PROJECT = "your-gcp-project" # β Change this | |
| TEAM_MAPPING_QUERY = "SELECT * FROM `dataset.team_mapping`" | |
| CLIENT_EXTRACT_QUERY = "SELECT * FROM `dataset.client_extract`" | |
| DB_PATH = "/content/app_data.db" | |
| # ββ 1. Install dependencies βββββββββββββββββββββββββββββββββ | |
| import subprocess | |
| subprocess.run(["pip", "install", "-q", "google-cloud-bigquery", "huggingface_hub", "pandas"], capture_output=True) | |
| # ββ 2. Authenticate βββββββββββββββββββββββββββββββββββββββββ | |
| from google.colab import auth as colab_auth | |
| colab_auth.authenticate_user() | |
| from google.cloud import bigquery | |
| import pandas as pd | |
| import sqlite3 | |
| import os | |
| client = bigquery.Client(project=BQ_PROJECT) | |
| # ββ 3. Pull data from BigQuery βββββββββββββββββββββββββββββββ | |
| print("π₯ Pulling Team Mapping...") | |
| df_team = client.query(TEAM_MAPPING_QUERY).to_dataframe() | |
| print(f" β {len(df_team)} rows") | |
| print("π₯ Pulling Client Extract...") | |
| df_clients = client.query(CLIENT_EXTRACT_QUERY).to_dataframe() | |
| print(f" β {len(df_clients)} rows") | |
| # ββ 4. Parse Team Mapping into producers table βββββββββββββββ | |
| def parse_producers(df): | |
| """Convert raw team mapping into the producers table format.""" | |
| records = [] | |
| for _, row in df.iterrows(): | |
| code = str(row.iloc[0]).strip() | |
| name = str(row.iloc[1]).strip() | |
| # Skip rules (same as notebook) | |
| if not code or not name: | |
| continue | |
| if code.upper() == "PRODUCER CODE" or name.upper() == "PRODUCER NAME": | |
| continue | |
| if name == "House Account": | |
| continue | |
| if len(code) < 2 or not code[-1].isdigit(): | |
| continue | |
| prefix = code[:-1] | |
| suffix = int(code[-1]) | |
| records.append({ | |
| "code": code, | |
| "prefix": prefix, | |
| "suffix": suffix, | |
| "name": name, | |
| "is_active": 1, | |
| }) | |
| return pd.DataFrame(records) | |
| def parse_clients(df): | |
| """Convert raw client extract into the clients table format.""" | |
| records = [] | |
| # Skip header/metadata rows | |
| for _, row in df.iloc[1:].iterrows(): | |
| code = str(row.iloc[0]).strip() | |
| name = str(row.iloc[1]).strip() | |
| if not code or not name: | |
| continue | |
| records.append({ | |
| "lookup_code": code, | |
| "name": name, | |
| "is_active": 1, | |
| }) | |
| return pd.DataFrame(records) | |
| df_producers = parse_producers(df_team) | |
| df_clients_parsed = parse_clients(df_clients) | |
| print(f"\nβ Parsed {len(df_producers)} producer entries") | |
| print(f"β Parsed {len(df_clients_parsed)} client entries") | |
| # ββ 5. Write to SQLite ββββββββββββββββββββββββββββββββββββββ | |
| if os.path.exists(DB_PATH): | |
| os.remove(DB_PATH) | |
| conn = sqlite3.connect(DB_PATH) | |
| # We only replace the DATA tables (producers, clients). | |
| # The CONFIG tables (agreements, constants, etc.) live in app.db | |
| # and are managed through the admin panel. | |
| df_producers.to_sql("producers", conn, if_exists="replace", index=False) | |
| df_clients_parsed.to_sql("clients", conn, if_exists="replace", index=False) | |
| conn.close() | |
| size_kb = os.path.getsize(DB_PATH) / 1024 | |
| print(f"\nπΎ SQLite DB: {size_kb:.0f} KB") | |
| # ββ 6. Upload to HF Space βββββββββββββββββββββββββββββββββββ | |
| from huggingface_hub import HfApi | |
| from google.colab import userdata | |
| hf_token = userdata.get("HF_TOKEN") # Stored in Colab secrets | |
| api = HfApi(token=hf_token) | |
| print("π€ Uploading to HF Space...") | |
| api.upload_file( | |
| path_or_fileobj=DB_PATH, | |
| path_in_repo="data/app_data.db", | |
| repo_id=HF_REPO_ID, | |
| repo_type="space", | |
| ) | |
| print("β Daily refresh complete!") | |
| print(f" Producers: {len(df_producers)}") | |
| print(f" Clients: {len(df_clients_parsed)}") | |