File size: 5,429 Bytes
8b425b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import gspread
from google.oauth2.service_account import Credentials
import streamlit as st
import os
import json


# --- 1. SETUP GOOGLE SHEETS CONNECTION ---
def connect_to_sheet(sheet_name):
    scopes = [
        "https://www.googleapis.com/auth/spreadsheets",
        "https://www.googleapis.com/auth/drive"
    ]

    # --- STRATEGY 1: LOCAL FILE (Priority) ---
    if os.path.exists("credentials.json"):
        print("πŸ“‚ Using local 'credentials.json' file.")
        creds = Credentials.from_service_account_file("credentials.json", scopes=scopes)

    # --- STRATEGY 2: ENVIRONMENT VARIABLE (Fallback for Deploy) ---
    else:
        print("☁️ 'credentials.json' not found. Checking Environment Variables...")
        creds_json_str = os.environ.get("gcp_service_account")

        if not creds_json_str:
            raise ValueError(
                "❌ Error: Could not find 'credentials.json' LOCALLY, and 'gcp_service_account' is missing from ENV vars."
            )

        creds_dict = json.loads(creds_json_str)
        creds = Credentials.from_service_account_info(creds_dict, scopes=scopes)

    # Authorize & Open
    client = gspread.authorize(creds)
    return client.open(sheet_name).sheet1
# def connect_to_sheet(sheet_name):
#     # Define the scopes (Permissions)
#     scopes = [
#         "https://www.googleapis.com/auth/spreadsheets",
#         "https://www.googleapis.com/auth/drive"
#     ]
#
#     creds_json_str = os.environ.get("gcp_service_account")
#
#     if not creds_json_str:
#         raise ValueError(
#             "❌ Error: Could not find 'gcp_service_account' in Environment Variables. Did you add the secret in Hugging Face settings?")
#
#     # 2. Convert the String back into a Python Dictionary
#     creds_dict = json.loads(creds_json_str)
#
#     # 3. Create credentials object
#     creds = Credentials.from_service_account_info(
#         creds_dict,
#         scopes=scopes
#     )
#
#     # Authorize gspread
#     client = gspread.authorize(creds)
#
#     # Open the sheet
#     return client.open(sheet_name).sheet1

def load_cache_dict(sheet):
    """Returns a dict: {'company name': 'url'} for fast lookup."""
    try:
        data = sheet.get_all_records()
        # Create dict: lowercased name -> url
        return {row['Company'].lower().strip(): row['Website'] for row in data if row['Company']}
    except Exception as e:
        print(f"⚠️ Cache read error (empty sheet?): {e}")
        return {}


def append_to_cache(sheet, new_entries):
    """Appends list of {'Company': name, 'Website': url} to sheet."""
    if not new_entries:
        return

    rows = [[entry['Company'], entry['Website']] for entry in new_entries]
    try:
        sheet.append_rows(rows)
        print(f"πŸ’Ύ Cached {len(rows)} new companies.")
    except Exception as e:
        print(f"❌ Error saving to cache: {e}")

# # --- 2. LOAD CACHE (READ URLS FROM SHEET) ---
# def load_cache(sheet):
#     """
#     Reads the entire sheet and creates a dictionary:
#     {'Nvidia': 'https://nvidia.com', 'Tesla': 'https://tesla.com'}
#     """
#     print("πŸ“‚ Reading existing data from Google Sheet...")
#     data = sheet.get_all_records()  # Assumes headers: "Company", "Website"
#
#     # Create a quick lookup dictionary (Normalize names to lowercase to be safe)
#     cache = {row['Company'].lower().strip(): row['Website'] for row in data if row['Company']}
#     return cache
#
#
# # --- 3. THE "CASH SAVER" FUNCTION ---
# def save_companies_to_cache(new_rows_to_add, sheet_name):
#     sheet = connect_to_sheet(sheet_name)
#
#     # 1. Check if there is data
#     if new_rows_to_add:
#         print(f"πŸ’Ύ Saving {len(new_rows_to_add)} new companies to Sheet...")
#
#         # 2. CONVERT Dicts to Lists
#         values_to_upload = [
#             [item.get('company_name'), item.get('company_website')]
#             for item in new_rows_to_add
#         ]
#
#         # 3. Append to Sheet
#         sheet.append_rows(values_to_upload)
#         print("βœ… Save Complete.")
#
#     else:
#         print("πŸŽ‰ No new searches needed. Sheet is up to date!")
#
#
# # --- 3. THE LOAD CACHED COMPANIES FUNCTION ---
# def get_cached_companies(company_list, sheet_name):
#     """
#     Splits companies into 'Found in Cache' and 'Missing (Need to Search)'
#     """
#     sheet = connect_to_sheet(sheet_name)
#     cache = load_cache(sheet)
#
#     companies_in_cache = []
#     missing_companies = []
#
#     print(f"πŸ“‚ Checking Cache for {len(company_list)} companies...")
#     # print(company_list)
#
#     for item in company_list:
#         name = item  # item['company_name']
#         # Normalize key for matching (must match how you save them)
#         name_key = name.lower().strip()
#
#         # === CHECK CACHE ===
#         # We check if key exists AND value is not empty
#         if name_key in cache and cache[name_key]:
#             # print(f"   βœ… Cache Hit: {name}") # Optional: Comment out to reduce noise
#             companies_in_cache.append({
#                 'company_name': name,
#                 'company_website': cache[name_key]
#             })
#         # === MISSING ===
#         else:
#             # print(f"   πŸ”Ž Cache Miss: {name}")
#             missing_companies.append(item)
#
#     # RETURN OUTSIDE THE LOOP
#     return {
#         'found': companies_in_cache,
#         'missing': missing_companies
#     }