Spaces:

chummchumm
/

newFinderAgent_v2

Paused

File size: 5,429 Bytes

8b425b2

import gspread
from google.oauth2.service_account import Credentials
import streamlit as st
import os
import json


# --- 1. SETUP GOOGLE SHEETS CONNECTION ---
def connect_to_sheet(sheet_name):
    scopes = [
        "https://www.googleapis.com/auth/spreadsheets",
        "https://www.googleapis.com/auth/drive"
    ]

    # --- STRATEGY 1: LOCAL FILE (Priority) ---
    if os.path.exists("credentials.json"):
        print("📂 Using local 'credentials.json' file.")
        creds = Credentials.from_service_account_file("credentials.json", scopes=scopes)

    # --- STRATEGY 2: ENVIRONMENT VARIABLE (Fallback for Deploy) ---
    else:
        print("☁️ 'credentials.json' not found. Checking Environment Variables...")
        creds_json_str = os.environ.get("gcp_service_account")

        if not creds_json_str:
            raise ValueError(
                "❌ Error: Could not find 'credentials.json' LOCALLY, and 'gcp_service_account' is missing from ENV vars."
            )

        creds_dict = json.loads(creds_json_str)
        creds = Credentials.from_service_account_info(creds_dict, scopes=scopes)

    # Authorize & Open
    client = gspread.authorize(creds)
    return client.open(sheet_name).sheet1
# def connect_to_sheet(sheet_name):
#     # Define the scopes (Permissions)
#     scopes = [
#         "https://www.googleapis.com/auth/spreadsheets",
#         "https://www.googleapis.com/auth/drive"
#     ]
#
#     creds_json_str = os.environ.get("gcp_service_account")
#
#     if not creds_json_str:
#         raise ValueError(
#             "❌ Error: Could not find 'gcp_service_account' in Environment Variables. Did you add the secret in Hugging Face settings?")
#
#     # 2. Convert the String back into a Python Dictionary
#     creds_dict = json.loads(creds_json_str)
#
#     # 3. Create credentials object
#     creds = Credentials.from_service_account_info(
#         creds_dict,
#         scopes=scopes
#     )
#
#     # Authorize gspread
#     client = gspread.authorize(creds)
#
#     # Open the sheet
#     return client.open(sheet_name).sheet1

def load_cache_dict(sheet):
    """Returns a dict: {'company name': 'url'} for fast lookup."""
    try:
        data = sheet.get_all_records()
        # Create dict: lowercased name -> url
        return {row['Company'].lower().strip(): row['Website'] for row in data if row['Company']}
    except Exception as e:
        print(f"⚠️ Cache read error (empty sheet?): {e}")
        return {}


def append_to_cache(sheet, new_entries):
    """Appends list of {'Company': name, 'Website': url} to sheet."""
    if not new_entries:
        return

    rows = [[entry['Company'], entry['Website']] for entry in new_entries]
    try:
        sheet.append_rows(rows)
        print(f"💾 Cached {len(rows)} new companies.")
    except Exception as e:
        print(f"❌ Error saving to cache: {e}")

# # --- 2. LOAD CACHE (READ URLS FROM SHEET) ---
# def load_cache(sheet):
#     """
#     Reads the entire sheet and creates a dictionary:
#     {'Nvidia': 'https://nvidia.com', 'Tesla': 'https://tesla.com'}
#     """
#     print("📂 Reading existing data from Google Sheet...")
#     data = sheet.get_all_records()  # Assumes headers: "Company", "Website"
#
#     # Create a quick lookup dictionary (Normalize names to lowercase to be safe)
#     cache = {row['Company'].lower().strip(): row['Website'] for row in data if row['Company']}
#     return cache
#
#
# # --- 3. THE "CASH SAVER" FUNCTION ---
# def save_companies_to_cache(new_rows_to_add, sheet_name):
#     sheet = connect_to_sheet(sheet_name)
#
#     # 1. Check if there is data
#     if new_rows_to_add:
#         print(f"💾 Saving {len(new_rows_to_add)} new companies to Sheet...")
#
#         # 2. CONVERT Dicts to Lists
#         values_to_upload = [
#             [item.get('company_name'), item.get('company_website')]
#             for item in new_rows_to_add
#         ]
#
#         # 3. Append to Sheet
#         sheet.append_rows(values_to_upload)
#         print("✅ Save Complete.")
#
#     else:
#         print("🎉 No new searches needed. Sheet is up to date!")
#
#
# # --- 3. THE LOAD CACHED COMPANIES FUNCTION ---
# def get_cached_companies(company_list, sheet_name):
#     """
#     Splits companies into 'Found in Cache' and 'Missing (Need to Search)'
#     """
#     sheet = connect_to_sheet(sheet_name)
#     cache = load_cache(sheet)
#
#     companies_in_cache = []
#     missing_companies = []
#
#     print(f"📂 Checking Cache for {len(company_list)} companies...")
#     # print(company_list)
#
#     for item in company_list:
#         name = item  # item['company_name']
#         # Normalize key for matching (must match how you save them)
#         name_key = name.lower().strip()
#
#         # === CHECK CACHE ===
#         # We check if key exists AND value is not empty
#         if name_key in cache and cache[name_key]:
#             # print(f"   ✅ Cache Hit: {name}") # Optional: Comment out to reduce noise
#             companies_in_cache.append({
#                 'company_name': name,
#                 'company_website': cache[name_key]
#             })
#         # === MISSING ===
#         else:
#             # print(f"   🔎 Cache Miss: {name}")
#             missing_companies.append(item)
#
#     # RETURN OUTSIDE THE LOOP
#     return {
#         'found': companies_in_cache,
#         'missing': missing_companies
#     }