import time import gspread from google.oauth2.service_account import Credentials import pandas as pd import streamlit as st import os import json SCOPES = ["https://www.googleapis.com/auth/spreadsheets.readonly"] sheet_id = "10nGgqXxunGXo_GI1LxybvsAr1TYSDdNiqqZX6DSTbDA" headers = [ 'Catalog', 'Mapping status', 'Priority', 'Program kinds', 'Customers', 'Size', 'Size Aprox', 'Needed by', 'Recommendations', 'Scraping?', 'Custom provider deeplinks', "Scraping link" ] def get_creds(): raw = os.getenv("GCP_CREDENTIALS") if raw is None: raise ValueError("Missing GCP_CREDENTIALS environment variable.") return Credentials.from_service_account_info(json.loads(raw), scopes=SCOPES) def load_gsheet(tab_name: str) -> pd.DataFrame: creds = get_creds() client = gspread.authorize(creds) w = client.open_by_key(sheet_id) for attempt in range(3): try: ws = w.worksheet(tab_name) if tab_name == "Catalog Status": df = pd.DataFrame(ws.get_all_records(expected_headers=headers)) else: df = pd.DataFrame(ws.get_all_records()) return df except gspread.exceptions.APIError as e: if attempt < 2: st.warning(f"Retrying Google API for {tab_name}... ({attempt+1}/3)") time.sleep(2) else: st.error(f"Failed to load '{tab_name}': {e}") raise e def get_data(): onboarding = load_gsheet("Catalog Onboarding") time.sleep(1) metadata = load_gsheet("NEW Catalog Data levels") time.sleep(1) mapping = load_gsheet("Catalog Status") return onboarding, metadata, mapping