Spaces:

chummchumm
/

newFinderAgent_v2

Paused

App Files Files Community

newFinderAgent_v2 / src /cache.py

chummchumm

Upload 6 files

8b425b2 verified 11 days ago

raw

history blame contribute delete

5.43 kB

	import gspread
	from google.oauth2.service_account import Credentials
	import streamlit as st
	import os
	import json


	# --- 1. SETUP GOOGLE SHEETS CONNECTION ---
	def connect_to_sheet(sheet_name):
	scopes = [
	"https://www.googleapis.com/auth/spreadsheets",
	"https://www.googleapis.com/auth/drive"
	]

	# --- STRATEGY 1: LOCAL FILE (Priority) ---
	if os.path.exists("credentials.json"):
	print("📂 Using local 'credentials.json' file.")
	creds = Credentials.from_service_account_file("credentials.json", scopes=scopes)

	# --- STRATEGY 2: ENVIRONMENT VARIABLE (Fallback for Deploy) ---
	else:
	print("☁️ 'credentials.json' not found. Checking Environment Variables...")
	creds_json_str = os.environ.get("gcp_service_account")

	if not creds_json_str:
	raise ValueError(
	"❌ Error: Could not find 'credentials.json' LOCALLY, and 'gcp_service_account' is missing from ENV vars."
	)

	creds_dict = json.loads(creds_json_str)
	creds = Credentials.from_service_account_info(creds_dict, scopes=scopes)

	# Authorize & Open
	client = gspread.authorize(creds)
	return client.open(sheet_name).sheet1
	# def connect_to_sheet(sheet_name):
	# # Define the scopes (Permissions)
	# scopes = [
	# "https://www.googleapis.com/auth/spreadsheets",
	# "https://www.googleapis.com/auth/drive"
	# ]
	#
	# creds_json_str = os.environ.get("gcp_service_account")
	#
	# if not creds_json_str:
	# raise ValueError(
	# "❌ Error: Could not find 'gcp_service_account' in Environment Variables. Did you add the secret in Hugging Face settings?")
	#
	# # 2. Convert the String back into a Python Dictionary
	# creds_dict = json.loads(creds_json_str)
	#
	# # 3. Create credentials object
	# creds = Credentials.from_service_account_info(
	# creds_dict,
	# scopes=scopes
	# )
	#
	# # Authorize gspread
	# client = gspread.authorize(creds)
	#
	# # Open the sheet
	# return client.open(sheet_name).sheet1

	def load_cache_dict(sheet):
	"""Returns a dict: {'company name': 'url'} for fast lookup."""
	try:
	data = sheet.get_all_records()
	# Create dict: lowercased name -> url
	return {row['Company'].lower().strip(): row['Website'] for row in data if row['Company']}
	except Exception as e:
	print(f"⚠️ Cache read error (empty sheet?): {e}")
	return {}


	def append_to_cache(sheet, new_entries):
	"""Appends list of {'Company': name, 'Website': url} to sheet."""
	if not new_entries:
	return

	rows = [[entry['Company'], entry['Website']] for entry in new_entries]
	try:
	sheet.append_rows(rows)
	print(f"💾 Cached {len(rows)} new companies.")
	except Exception as e:
	print(f"❌ Error saving to cache: {e}")

	# # --- 2. LOAD CACHE (READ URLS FROM SHEET) ---
	# def load_cache(sheet):
	# """
	# Reads the entire sheet and creates a dictionary:
	# {'Nvidia': 'https://nvidia.com', 'Tesla': 'https://tesla.com'}
	# """
	# print("📂 Reading existing data from Google Sheet...")
	# data = sheet.get_all_records() # Assumes headers: "Company", "Website"
	#
	# # Create a quick lookup dictionary (Normalize names to lowercase to be safe)
	# cache = {row['Company'].lower().strip(): row['Website'] for row in data if row['Company']}
	# return cache
	#
	#
	# # --- 3. THE "CASH SAVER" FUNCTION ---
	# def save_companies_to_cache(new_rows_to_add, sheet_name):
	# sheet = connect_to_sheet(sheet_name)
	#
	# # 1. Check if there is data
	# if new_rows_to_add:
	# print(f"💾 Saving {len(new_rows_to_add)} new companies to Sheet...")
	#
	# # 2. CONVERT Dicts to Lists
	# values_to_upload = [
	# [item.get('company_name'), item.get('company_website')]
	# for item in new_rows_to_add
	# ]
	#
	# # 3. Append to Sheet
	# sheet.append_rows(values_to_upload)
	# print("✅ Save Complete.")
	#
	# else:
	# print("🎉 No new searches needed. Sheet is up to date!")
	#
	#
	# # --- 3. THE LOAD CACHED COMPANIES FUNCTION ---
	# def get_cached_companies(company_list, sheet_name):
	# """
	# Splits companies into 'Found in Cache' and 'Missing (Need to Search)'
	# """
	# sheet = connect_to_sheet(sheet_name)
	# cache = load_cache(sheet)
	#
	# companies_in_cache = []
	# missing_companies = []
	#
	# print(f"📂 Checking Cache for {len(company_list)} companies...")
	# # print(company_list)
	#
	# for item in company_list:
	# name = item # item['company_name']
	# # Normalize key for matching (must match how you save them)
	# name_key = name.lower().strip()
	#
	# # === CHECK CACHE ===
	# # We check if key exists AND value is not empty
	# if name_key in cache and cache[name_key]:
	# # print(f" ✅ Cache Hit: {name}") # Optional: Comment out to reduce noise
	# companies_in_cache.append({
	# 'company_name': name,
	# 'company_website': cache[name_key]
	# })
	# # === MISSING ===
	# else:
	# # print(f" 🔎 Cache Miss: {name}")
	# missing_companies.append(item)
	#
	# # RETURN OUTSIDE THE LOOP
	# return {
	# 'found': companies_in_cache,
	# 'missing': missing_companies
	# }