Spaces:

mbecchis
/

streaming-visualization

Sleeping

Update gsheet_loader.py

658a2f6 verified 3 months ago

1.7 kB

	import time
	import gspread
	from google.oauth2.service_account import Credentials
	import pandas as pd
	import streamlit as st
	import os
	import json

	SCOPES = ["https://www.googleapis.com/auth/spreadsheets.readonly"]
	sheet_id = "10nGgqXxunGXo_GI1LxybvsAr1TYSDdNiqqZX6DSTbDA"

	headers = [
	'Catalog', 'Mapping status', 'Priority', 'Program kinds', 'Customers',
	'Size', 'Size Aprox', 'Needed by', 'Recommendations', 'Scraping?',
	'Custom provider deeplinks', "Scraping link"
	]

	def get_creds():
	raw = os.getenv("GCP_CREDENTIALS")
	if raw is None:
	raise ValueError("Missing GCP_CREDENTIALS environment variable.")
	return Credentials.from_service_account_info(json.loads(raw), scopes=SCOPES)

	def load_gsheet(tab_name: str) -> pd.DataFrame:
	creds = get_creds()
	client = gspread.authorize(creds)
	w = client.open_by_key(sheet_id)

	for attempt in range(3):
	try:
	ws = w.worksheet(tab_name)
	if tab_name == "Catalog Status":
	df = pd.DataFrame(ws.get_all_records(expected_headers=headers))
	else:
	df = pd.DataFrame(ws.get_all_records())
	return df
	except gspread.exceptions.APIError as e:
	if attempt < 2:
	st.warning(f"Retrying Google API for {tab_name}... ({attempt+1}/3)")
	time.sleep(2)
	else:
	st.error(f"Failed to load '{tab_name}': {e}")
	raise e

	def get_data():
	onboarding = load_gsheet("Catalog Onboarding")
	time.sleep(1)
	metadata = load_gsheet("NEW Catalog Data levels")
	time.sleep(1)
	mapping = load_gsheet("Catalog Status")
	return onboarding, metadata, mapping