File size: 4,796 Bytes
996fcf9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
"""
Daily Data Refresh β€” BigQuery β†’ SQLite β†’ HF Space

Schedule this notebook in Google Colab:
  Edit β†’ Triggers β†’ Add trigger β†’ Daily

What it does:
  1. Authenticates with Google Cloud
  2. Pulls Team Mapping and Client data from BigQuery
  3. Parses and writes to a SQLite database
  4. Uploads the DB to Hugging Face Space persistent storage

Environment variables needed:
  HF_TOKEN β€” Hugging Face write token (set in Colab secrets)
"""

# ── Config ───────────────────────────────────────────────────
HF_REPO_ID = "your-username/producer-intake"   # ← Change this
BQ_PROJECT = "your-gcp-project"                 # ← Change this
TEAM_MAPPING_QUERY = "SELECT * FROM `dataset.team_mapping`"
CLIENT_EXTRACT_QUERY = "SELECT * FROM `dataset.client_extract`"

DB_PATH = "/content/app_data.db"


# ── 1. Install dependencies ─────────────────────────────────
import subprocess
subprocess.run(["pip", "install", "-q", "google-cloud-bigquery", "huggingface_hub", "pandas"], capture_output=True)


# ── 2. Authenticate ─────────────────────────────────────────
from google.colab import auth as colab_auth
colab_auth.authenticate_user()

from google.cloud import bigquery
import pandas as pd
import sqlite3
import os

client = bigquery.Client(project=BQ_PROJECT)


# ── 3. Pull data from BigQuery ───────────────────────────────
print("πŸ“₯ Pulling Team Mapping...")
df_team = client.query(TEAM_MAPPING_QUERY).to_dataframe()
print(f"   β†’ {len(df_team)} rows")

print("πŸ“₯ Pulling Client Extract...")
df_clients = client.query(CLIENT_EXTRACT_QUERY).to_dataframe()
print(f"   β†’ {len(df_clients)} rows")


# ── 4. Parse Team Mapping into producers table ───────────────
def parse_producers(df):
    """Convert raw team mapping into the producers table format."""
    records = []
    for _, row in df.iterrows():
        code = str(row.iloc[0]).strip()
        name = str(row.iloc[1]).strip()

        # Skip rules (same as notebook)
        if not code or not name:
            continue
        if code.upper() == "PRODUCER CODE" or name.upper() == "PRODUCER NAME":
            continue
        if name == "House Account":
            continue
        if len(code) < 2 or not code[-1].isdigit():
            continue

        prefix = code[:-1]
        suffix = int(code[-1])
        records.append({
            "code": code,
            "prefix": prefix,
            "suffix": suffix,
            "name": name,
            "is_active": 1,
        })

    return pd.DataFrame(records)


def parse_clients(df):
    """Convert raw client extract into the clients table format."""
    records = []
    # Skip header/metadata rows
    for _, row in df.iloc[1:].iterrows():
        code = str(row.iloc[0]).strip()
        name = str(row.iloc[1]).strip()
        if not code or not name:
            continue
        records.append({
            "lookup_code": code,
            "name": name,
            "is_active": 1,
        })

    return pd.DataFrame(records)


df_producers = parse_producers(df_team)
df_clients_parsed = parse_clients(df_clients)

print(f"\nβœ… Parsed {len(df_producers)} producer entries")
print(f"βœ… Parsed {len(df_clients_parsed)} client entries")


# ── 5. Write to SQLite ──────────────────────────────────────
if os.path.exists(DB_PATH):
    os.remove(DB_PATH)

conn = sqlite3.connect(DB_PATH)

# We only replace the DATA tables (producers, clients).
# The CONFIG tables (agreements, constants, etc.) live in app.db
# and are managed through the admin panel.
df_producers.to_sql("producers", conn, if_exists="replace", index=False)
df_clients_parsed.to_sql("clients", conn, if_exists="replace", index=False)

conn.close()

size_kb = os.path.getsize(DB_PATH) / 1024
print(f"\nπŸ’Ύ SQLite DB: {size_kb:.0f} KB")


# ── 6. Upload to HF Space ───────────────────────────────────
from huggingface_hub import HfApi
from google.colab import userdata

hf_token = userdata.get("HF_TOKEN")   # Stored in Colab secrets
api = HfApi(token=hf_token)

print("πŸ“€ Uploading to HF Space...")
api.upload_file(
    path_or_fileobj=DB_PATH,
    path_in_repo="data/app_data.db",
    repo_id=HF_REPO_ID,
    repo_type="space",
)

print("βœ… Daily refresh complete!")
print(f"   Producers: {len(df_producers)}")
print(f"   Clients:   {len(df_clients_parsed)}")