import gspread from google.oauth2.service_account import Credentials import streamlit as st import os import json # --- 1. SETUP GOOGLE SHEETS CONNECTION --- def connect_to_sheet(sheet_name): scopes = [ "https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/drive" ] # --- STRATEGY 1: LOCAL FILE (Priority) --- if os.path.exists("credentials.json"): print("📂 Using local 'credentials.json' file.") creds = Credentials.from_service_account_file("credentials.json", scopes=scopes) # --- STRATEGY 2: ENVIRONMENT VARIABLE (Fallback for Deploy) --- else: print("☁️ 'credentials.json' not found. Checking Environment Variables...") creds_json_str = os.environ.get("gcp_service_account") if not creds_json_str: raise ValueError( "❌ Error: Could not find 'credentials.json' LOCALLY, and 'gcp_service_account' is missing from ENV vars." ) creds_dict = json.loads(creds_json_str) creds = Credentials.from_service_account_info(creds_dict, scopes=scopes) # Authorize & Open client = gspread.authorize(creds) return client.open(sheet_name).sheet1 # def connect_to_sheet(sheet_name): # # Define the scopes (Permissions) # scopes = [ # "https://www.googleapis.com/auth/spreadsheets", # "https://www.googleapis.com/auth/drive" # ] # # creds_json_str = os.environ.get("gcp_service_account") # # if not creds_json_str: # raise ValueError( # "❌ Error: Could not find 'gcp_service_account' in Environment Variables. Did you add the secret in Hugging Face settings?") # # # 2. Convert the String back into a Python Dictionary # creds_dict = json.loads(creds_json_str) # # # 3. Create credentials object # creds = Credentials.from_service_account_info( # creds_dict, # scopes=scopes # ) # # # Authorize gspread # client = gspread.authorize(creds) # # # Open the sheet # return client.open(sheet_name).sheet1 def load_cache_dict(sheet): """Returns a dict: {'company name': 'url'} for fast lookup.""" try: data = sheet.get_all_records() # Create dict: lowercased name -> url return {row['Company'].lower().strip(): row['Website'] for row in data if row['Company']} except Exception as e: print(f"⚠️ Cache read error (empty sheet?): {e}") return {} def append_to_cache(sheet, new_entries): """Appends list of {'Company': name, 'Website': url} to sheet.""" if not new_entries: return rows = [[entry['Company'], entry['Website']] for entry in new_entries] try: sheet.append_rows(rows) print(f"💾 Cached {len(rows)} new companies.") except Exception as e: print(f"❌ Error saving to cache: {e}") # # --- 2. LOAD CACHE (READ URLS FROM SHEET) --- # def load_cache(sheet): # """ # Reads the entire sheet and creates a dictionary: # {'Nvidia': 'https://nvidia.com', 'Tesla': 'https://tesla.com'} # """ # print("📂 Reading existing data from Google Sheet...") # data = sheet.get_all_records() # Assumes headers: "Company", "Website" # # # Create a quick lookup dictionary (Normalize names to lowercase to be safe) # cache = {row['Company'].lower().strip(): row['Website'] for row in data if row['Company']} # return cache # # # # --- 3. THE "CASH SAVER" FUNCTION --- # def save_companies_to_cache(new_rows_to_add, sheet_name): # sheet = connect_to_sheet(sheet_name) # # # 1. Check if there is data # if new_rows_to_add: # print(f"💾 Saving {len(new_rows_to_add)} new companies to Sheet...") # # # 2. CONVERT Dicts to Lists # values_to_upload = [ # [item.get('company_name'), item.get('company_website')] # for item in new_rows_to_add # ] # # # 3. Append to Sheet # sheet.append_rows(values_to_upload) # print("✅ Save Complete.") # # else: # print("🎉 No new searches needed. Sheet is up to date!") # # # # --- 3. THE LOAD CACHED COMPANIES FUNCTION --- # def get_cached_companies(company_list, sheet_name): # """ # Splits companies into 'Found in Cache' and 'Missing (Need to Search)' # """ # sheet = connect_to_sheet(sheet_name) # cache = load_cache(sheet) # # companies_in_cache = [] # missing_companies = [] # # print(f"📂 Checking Cache for {len(company_list)} companies...") # # print(company_list) # # for item in company_list: # name = item # item['company_name'] # # Normalize key for matching (must match how you save them) # name_key = name.lower().strip() # # # === CHECK CACHE === # # We check if key exists AND value is not empty # if name_key in cache and cache[name_key]: # # print(f" ✅ Cache Hit: {name}") # Optional: Comment out to reduce noise # companies_in_cache.append({ # 'company_name': name, # 'company_website': cache[name_key] # }) # # === MISSING === # else: # # print(f" 🔎 Cache Miss: {name}") # missing_companies.append(item) # # # RETURN OUTSIDE THE LOOP # return { # 'found': companies_in_cache, # 'missing': missing_companies # }