Spaces:
Sleeping
Sleeping
File size: 5,429 Bytes
798e7b0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 | import gspread
from google.oauth2.service_account import Credentials
import streamlit as st
import os
import json
# --- 1. SETUP GOOGLE SHEETS CONNECTION ---
def connect_to_sheet(sheet_name):
scopes = [
"https://www.googleapis.com/auth/spreadsheets",
"https://www.googleapis.com/auth/drive"
]
# --- STRATEGY 1: LOCAL FILE (Priority) ---
if os.path.exists("credentials.json"):
print("π Using local 'credentials.json' file.")
creds = Credentials.from_service_account_file("credentials.json", scopes=scopes)
# --- STRATEGY 2: ENVIRONMENT VARIABLE (Fallback for Deploy) ---
else:
print("βοΈ 'credentials.json' not found. Checking Environment Variables...")
creds_json_str = os.environ.get("gcp_service_account")
if not creds_json_str:
raise ValueError(
"β Error: Could not find 'credentials.json' LOCALLY, and 'gcp_service_account' is missing from ENV vars."
)
creds_dict = json.loads(creds_json_str)
creds = Credentials.from_service_account_info(creds_dict, scopes=scopes)
# Authorize & Open
client = gspread.authorize(creds)
return client.open(sheet_name).sheet1
# def connect_to_sheet(sheet_name):
# # Define the scopes (Permissions)
# scopes = [
# "https://www.googleapis.com/auth/spreadsheets",
# "https://www.googleapis.com/auth/drive"
# ]
#
# creds_json_str = os.environ.get("gcp_service_account")
#
# if not creds_json_str:
# raise ValueError(
# "β Error: Could not find 'gcp_service_account' in Environment Variables. Did you add the secret in Hugging Face settings?")
#
# # 2. Convert the String back into a Python Dictionary
# creds_dict = json.loads(creds_json_str)
#
# # 3. Create credentials object
# creds = Credentials.from_service_account_info(
# creds_dict,
# scopes=scopes
# )
#
# # Authorize gspread
# client = gspread.authorize(creds)
#
# # Open the sheet
# return client.open(sheet_name).sheet1
def load_cache_dict(sheet):
"""Returns a dict: {'company name': 'url'} for fast lookup."""
try:
data = sheet.get_all_records()
# Create dict: lowercased name -> url
return {row['Company'].lower().strip(): row['Website'] for row in data if row['Company']}
except Exception as e:
print(f"β οΈ Cache read error (empty sheet?): {e}")
return {}
def append_to_cache(sheet, new_entries):
"""Appends list of {'Company': name, 'Website': url} to sheet."""
if not new_entries:
return
rows = [[entry['Company'], entry['Website']] for entry in new_entries]
try:
sheet.append_rows(rows)
print(f"πΎ Cached {len(rows)} new companies.")
except Exception as e:
print(f"β Error saving to cache: {e}")
# # --- 2. LOAD CACHE (READ URLS FROM SHEET) ---
# def load_cache(sheet):
# """
# Reads the entire sheet and creates a dictionary:
# {'Nvidia': 'https://nvidia.com', 'Tesla': 'https://tesla.com'}
# """
# print("π Reading existing data from Google Sheet...")
# data = sheet.get_all_records() # Assumes headers: "Company", "Website"
#
# # Create a quick lookup dictionary (Normalize names to lowercase to be safe)
# cache = {row['Company'].lower().strip(): row['Website'] for row in data if row['Company']}
# return cache
#
#
# # --- 3. THE "CASH SAVER" FUNCTION ---
# def save_companies_to_cache(new_rows_to_add, sheet_name):
# sheet = connect_to_sheet(sheet_name)
#
# # 1. Check if there is data
# if new_rows_to_add:
# print(f"πΎ Saving {len(new_rows_to_add)} new companies to Sheet...")
#
# # 2. CONVERT Dicts to Lists
# values_to_upload = [
# [item.get('company_name'), item.get('company_website')]
# for item in new_rows_to_add
# ]
#
# # 3. Append to Sheet
# sheet.append_rows(values_to_upload)
# print("β
Save Complete.")
#
# else:
# print("π No new searches needed. Sheet is up to date!")
#
#
# # --- 3. THE LOAD CACHED COMPANIES FUNCTION ---
# def get_cached_companies(company_list, sheet_name):
# """
# Splits companies into 'Found in Cache' and 'Missing (Need to Search)'
# """
# sheet = connect_to_sheet(sheet_name)
# cache = load_cache(sheet)
#
# companies_in_cache = []
# missing_companies = []
#
# print(f"π Checking Cache for {len(company_list)} companies...")
# # print(company_list)
#
# for item in company_list:
# name = item # item['company_name']
# # Normalize key for matching (must match how you save them)
# name_key = name.lower().strip()
#
# # === CHECK CACHE ===
# # We check if key exists AND value is not empty
# if name_key in cache and cache[name_key]:
# # print(f" β
Cache Hit: {name}") # Optional: Comment out to reduce noise
# companies_in_cache.append({
# 'company_name': name,
# 'company_website': cache[name_key]
# })
# # === MISSING ===
# else:
# # print(f" π Cache Miss: {name}")
# missing_companies.append(item)
#
# # RETURN OUTSIDE THE LOOP
# return {
# 'found': companies_in_cache,
# 'missing': missing_companies
# }
|