Spaces:
Running
Running
File size: 4,796 Bytes
996fcf9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 | """
Daily Data Refresh β BigQuery β SQLite β HF Space
Schedule this notebook in Google Colab:
Edit β Triggers β Add trigger β Daily
What it does:
1. Authenticates with Google Cloud
2. Pulls Team Mapping and Client data from BigQuery
3. Parses and writes to a SQLite database
4. Uploads the DB to Hugging Face Space persistent storage
Environment variables needed:
HF_TOKEN β Hugging Face write token (set in Colab secrets)
"""
# ββ Config βββββββββββββββββββββββββββββββββββββββββββββββββββ
HF_REPO_ID = "your-username/producer-intake" # β Change this
BQ_PROJECT = "your-gcp-project" # β Change this
TEAM_MAPPING_QUERY = "SELECT * FROM `dataset.team_mapping`"
CLIENT_EXTRACT_QUERY = "SELECT * FROM `dataset.client_extract`"
DB_PATH = "/content/app_data.db"
# ββ 1. Install dependencies βββββββββββββββββββββββββββββββββ
import subprocess
subprocess.run(["pip", "install", "-q", "google-cloud-bigquery", "huggingface_hub", "pandas"], capture_output=True)
# ββ 2. Authenticate βββββββββββββββββββββββββββββββββββββββββ
from google.colab import auth as colab_auth
colab_auth.authenticate_user()
from google.cloud import bigquery
import pandas as pd
import sqlite3
import os
client = bigquery.Client(project=BQ_PROJECT)
# ββ 3. Pull data from BigQuery βββββββββββββββββββββββββββββββ
print("π₯ Pulling Team Mapping...")
df_team = client.query(TEAM_MAPPING_QUERY).to_dataframe()
print(f" β {len(df_team)} rows")
print("π₯ Pulling Client Extract...")
df_clients = client.query(CLIENT_EXTRACT_QUERY).to_dataframe()
print(f" β {len(df_clients)} rows")
# ββ 4. Parse Team Mapping into producers table βββββββββββββββ
def parse_producers(df):
"""Convert raw team mapping into the producers table format."""
records = []
for _, row in df.iterrows():
code = str(row.iloc[0]).strip()
name = str(row.iloc[1]).strip()
# Skip rules (same as notebook)
if not code or not name:
continue
if code.upper() == "PRODUCER CODE" or name.upper() == "PRODUCER NAME":
continue
if name == "House Account":
continue
if len(code) < 2 or not code[-1].isdigit():
continue
prefix = code[:-1]
suffix = int(code[-1])
records.append({
"code": code,
"prefix": prefix,
"suffix": suffix,
"name": name,
"is_active": 1,
})
return pd.DataFrame(records)
def parse_clients(df):
"""Convert raw client extract into the clients table format."""
records = []
# Skip header/metadata rows
for _, row in df.iloc[1:].iterrows():
code = str(row.iloc[0]).strip()
name = str(row.iloc[1]).strip()
if not code or not name:
continue
records.append({
"lookup_code": code,
"name": name,
"is_active": 1,
})
return pd.DataFrame(records)
df_producers = parse_producers(df_team)
df_clients_parsed = parse_clients(df_clients)
print(f"\nβ
Parsed {len(df_producers)} producer entries")
print(f"β
Parsed {len(df_clients_parsed)} client entries")
# ββ 5. Write to SQLite ββββββββββββββββββββββββββββββββββββββ
if os.path.exists(DB_PATH):
os.remove(DB_PATH)
conn = sqlite3.connect(DB_PATH)
# We only replace the DATA tables (producers, clients).
# The CONFIG tables (agreements, constants, etc.) live in app.db
# and are managed through the admin panel.
df_producers.to_sql("producers", conn, if_exists="replace", index=False)
df_clients_parsed.to_sql("clients", conn, if_exists="replace", index=False)
conn.close()
size_kb = os.path.getsize(DB_PATH) / 1024
print(f"\nπΎ SQLite DB: {size_kb:.0f} KB")
# ββ 6. Upload to HF Space βββββββββββββββββββββββββββββββββββ
from huggingface_hub import HfApi
from google.colab import userdata
hf_token = userdata.get("HF_TOKEN") # Stored in Colab secrets
api = HfApi(token=hf_token)
print("π€ Uploading to HF Space...")
api.upload_file(
path_or_fileobj=DB_PATH,
path_in_repo="data/app_data.db",
repo_id=HF_REPO_ID,
repo_type="space",
)
print("β
Daily refresh complete!")
print(f" Producers: {len(df_producers)}")
print(f" Clients: {len(df_clients_parsed)}")
|