mbecchis commited on
Commit
658a2f6
·
verified ·
1 Parent(s): 1b2b0c7

Update gsheet_loader.py

Browse files
Files changed (1) hide show
  1. gsheet_loader.py +51 -41
gsheet_loader.py CHANGED
@@ -1,41 +1,51 @@
1
- import time
2
- import gspread
3
- from google.oauth2.service_account import Credentials
4
- import pandas as pd
5
- import streamlit as st
6
-
7
- SCOPES = ["https://www.googleapis.com/auth/spreadsheets.readonly"]
8
- sheet_id = "10nGgqXxunGXo_GI1LxybvsAr1TYSDdNiqqZX6DSTbDA"
9
- key_path = "service_account_credentials.json"
10
-
11
- headers = ['Catalog', 'Mapping status', 'Priority', 'Program kinds', 'Customers', 'Size', 'Size Aprox', 'Needed by', 'Recommendations', 'Scraping?','Custom provider deeplinks', "Scraping link"]
12
-
13
-
14
- def load_gsheet(tab_name: str) -> pd.DataFrame:
15
- creds = Credentials.from_service_account_file(key_path, scopes=SCOPES)
16
- client = gspread.authorize(creds)
17
- w = client.open_by_key(sheet_id)
18
-
19
- for attempt in range(3): # retry loop
20
- try:
21
- ws = w.worksheet(tab_name)
22
- if tab_name == "Catalog Status":
23
- df = pd.DataFrame(ws.get_all_records(expected_headers=headers))
24
- else:
25
- df= pd.DataFrame(ws.get_all_records())
26
- return df
27
- except gspread.exceptions.APIError as e:
28
- if attempt < 2:
29
- st.warning(f"Retrying Google API for {tab_name}... ({attempt+1}/3)")
30
- time.sleep(2) # avoid hammering API
31
- else:
32
- st.error(f"Failed to load '{tab_name}': {e}")
33
- raise e
34
-
35
- def get_data():
36
- onboarding = load_gsheet("Catalog Onboarding")
37
- time.sleep(1)
38
- metadata = load_gsheet("NEW Catalog Data levels")
39
- time.sleep(1)
40
- mapping = load_gsheet("Catalog Status")
41
- return onboarding, metadata, mapping
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import gspread
3
+ from google.oauth2.service_account import Credentials
4
+ import pandas as pd
5
+ import streamlit as st
6
+ import os
7
+ import json
8
+
9
+ SCOPES = ["https://www.googleapis.com/auth/spreadsheets.readonly"]
10
+ sheet_id = "10nGgqXxunGXo_GI1LxybvsAr1TYSDdNiqqZX6DSTbDA"
11
+
12
+ headers = [
13
+ 'Catalog', 'Mapping status', 'Priority', 'Program kinds', 'Customers',
14
+ 'Size', 'Size Aprox', 'Needed by', 'Recommendations', 'Scraping?',
15
+ 'Custom provider deeplinks', "Scraping link"
16
+ ]
17
+
18
+ def get_creds():
19
+ raw = os.getenv("GCP_CREDENTIALS")
20
+ if raw is None:
21
+ raise ValueError("Missing GCP_CREDENTIALS environment variable.")
22
+ return Credentials.from_service_account_info(json.loads(raw), scopes=SCOPES)
23
+
24
+ def load_gsheet(tab_name: str) -> pd.DataFrame:
25
+ creds = get_creds()
26
+ client = gspread.authorize(creds)
27
+ w = client.open_by_key(sheet_id)
28
+
29
+ for attempt in range(3):
30
+ try:
31
+ ws = w.worksheet(tab_name)
32
+ if tab_name == "Catalog Status":
33
+ df = pd.DataFrame(ws.get_all_records(expected_headers=headers))
34
+ else:
35
+ df = pd.DataFrame(ws.get_all_records())
36
+ return df
37
+ except gspread.exceptions.APIError as e:
38
+ if attempt < 2:
39
+ st.warning(f"Retrying Google API for {tab_name}... ({attempt+1}/3)")
40
+ time.sleep(2)
41
+ else:
42
+ st.error(f"Failed to load '{tab_name}': {e}")
43
+ raise e
44
+
45
+ def get_data():
46
+ onboarding = load_gsheet("Catalog Onboarding")
47
+ time.sleep(1)
48
+ metadata = load_gsheet("NEW Catalog Data levels")
49
+ time.sleep(1)
50
+ mapping = load_gsheet("Catalog Status")
51
+ return onboarding, metadata, mapping