| import gspread | |
| from google.oauth2.service_account import Credentials | |
| import streamlit as st | |
| import os | |
| import json | |
| # --- 1. SETUP GOOGLE SHEETS CONNECTION --- | |
| def connect_to_sheet(sheet_name): | |
| scopes = [ | |
| "https://www.googleapis.com/auth/spreadsheets", | |
| "https://www.googleapis.com/auth/drive" | |
| ] | |
| # --- STRATEGY 1: LOCAL FILE (Priority) --- | |
| if os.path.exists("credentials.json"): | |
| print("π Using local 'credentials.json' file.") | |
| creds = Credentials.from_service_account_file("credentials.json", scopes=scopes) | |
| # --- STRATEGY 2: ENVIRONMENT VARIABLE (Fallback for Deploy) --- | |
| else: | |
| print("βοΈ 'credentials.json' not found. Checking Environment Variables...") | |
| creds_json_str = os.environ.get("gcp_service_account") | |
| if not creds_json_str: | |
| raise ValueError( | |
| "β Error: Could not find 'credentials.json' LOCALLY, and 'gcp_service_account' is missing from ENV vars." | |
| ) | |
| creds_dict = json.loads(creds_json_str) | |
| creds = Credentials.from_service_account_info(creds_dict, scopes=scopes) | |
| # Authorize & Open | |
| client = gspread.authorize(creds) | |
| return client.open(sheet_name).sheet1 | |
| # def connect_to_sheet(sheet_name): | |
| # # Define the scopes (Permissions) | |
| # scopes = [ | |
| # "https://www.googleapis.com/auth/spreadsheets", | |
| # "https://www.googleapis.com/auth/drive" | |
| # ] | |
| # | |
| # creds_json_str = os.environ.get("gcp_service_account") | |
| # | |
| # if not creds_json_str: | |
| # raise ValueError( | |
| # "β Error: Could not find 'gcp_service_account' in Environment Variables. Did you add the secret in Hugging Face settings?") | |
| # | |
| # # 2. Convert the String back into a Python Dictionary | |
| # creds_dict = json.loads(creds_json_str) | |
| # | |
| # # 3. Create credentials object | |
| # creds = Credentials.from_service_account_info( | |
| # creds_dict, | |
| # scopes=scopes | |
| # ) | |
| # | |
| # # Authorize gspread | |
| # client = gspread.authorize(creds) | |
| # | |
| # # Open the sheet | |
| # return client.open(sheet_name).sheet1 | |
| def load_cache_dict(sheet): | |
| """Returns a dict: {'company name': 'url'} for fast lookup.""" | |
| try: | |
| data = sheet.get_all_records() | |
| # Create dict: lowercased name -> url | |
| return {row['Company'].lower().strip(): row['Website'] for row in data if row['Company']} | |
| except Exception as e: | |
| print(f"β οΈ Cache read error (empty sheet?): {e}") | |
| return {} | |
| def append_to_cache(sheet, new_entries): | |
| """Appends list of {'Company': name, 'Website': url} to sheet.""" | |
| if not new_entries: | |
| return | |
| rows = [[entry['Company'], entry['Website']] for entry in new_entries] | |
| try: | |
| sheet.append_rows(rows) | |
| print(f"πΎ Cached {len(rows)} new companies.") | |
| except Exception as e: | |
| print(f"β Error saving to cache: {e}") | |
| # # --- 2. LOAD CACHE (READ URLS FROM SHEET) --- | |
| # def load_cache(sheet): | |
| # """ | |
| # Reads the entire sheet and creates a dictionary: | |
| # {'Nvidia': 'https://nvidia.com', 'Tesla': 'https://tesla.com'} | |
| # """ | |
| # print("π Reading existing data from Google Sheet...") | |
| # data = sheet.get_all_records() # Assumes headers: "Company", "Website" | |
| # | |
| # # Create a quick lookup dictionary (Normalize names to lowercase to be safe) | |
| # cache = {row['Company'].lower().strip(): row['Website'] for row in data if row['Company']} | |
| # return cache | |
| # | |
| # | |
| # # --- 3. THE "CASH SAVER" FUNCTION --- | |
| # def save_companies_to_cache(new_rows_to_add, sheet_name): | |
| # sheet = connect_to_sheet(sheet_name) | |
| # | |
| # # 1. Check if there is data | |
| # if new_rows_to_add: | |
| # print(f"πΎ Saving {len(new_rows_to_add)} new companies to Sheet...") | |
| # | |
| # # 2. CONVERT Dicts to Lists | |
| # values_to_upload = [ | |
| # [item.get('company_name'), item.get('company_website')] | |
| # for item in new_rows_to_add | |
| # ] | |
| # | |
| # # 3. Append to Sheet | |
| # sheet.append_rows(values_to_upload) | |
| # print("β Save Complete.") | |
| # | |
| # else: | |
| # print("π No new searches needed. Sheet is up to date!") | |
| # | |
| # | |
| # # --- 3. THE LOAD CACHED COMPANIES FUNCTION --- | |
| # def get_cached_companies(company_list, sheet_name): | |
| # """ | |
| # Splits companies into 'Found in Cache' and 'Missing (Need to Search)' | |
| # """ | |
| # sheet = connect_to_sheet(sheet_name) | |
| # cache = load_cache(sheet) | |
| # | |
| # companies_in_cache = [] | |
| # missing_companies = [] | |
| # | |
| # print(f"π Checking Cache for {len(company_list)} companies...") | |
| # # print(company_list) | |
| # | |
| # for item in company_list: | |
| # name = item # item['company_name'] | |
| # # Normalize key for matching (must match how you save them) | |
| # name_key = name.lower().strip() | |
| # | |
| # # === CHECK CACHE === | |
| # # We check if key exists AND value is not empty | |
| # if name_key in cache and cache[name_key]: | |
| # # print(f" β Cache Hit: {name}") # Optional: Comment out to reduce noise | |
| # companies_in_cache.append({ | |
| # 'company_name': name, | |
| # 'company_website': cache[name_key] | |
| # }) | |
| # # === MISSING === | |
| # else: | |
| # # print(f" π Cache Miss: {name}") | |
| # missing_companies.append(item) | |
| # | |
| # # RETURN OUTSIDE THE LOOP | |
| # return { | |
| # 'found': companies_in_cache, | |
| # 'missing': missing_companies | |
| # } | |