#import gspread def match_companies_to_articles(articles_metadata, ai_results): # A. Create a lookup dictionary: URL -> Title # This allows instant access to titles without looping every time url_to_title_map = {item['link']: item['title'] for item in articles_metadata} final_list = [] for result in ai_results: article_url = result.get('url') # Look up the title, default to "Unknown" if the URL isn't in metadata article_title = url_to_title_map.get(article_url, "Unknown Title") # Iterate through the companies found in this specific article if 'companies' in result: for company in result['companies']: record = { "company_name": company['name'], "company_url": company.get('url', ''), # Handle missing URLs gracefully "article_title": article_title, "article_url": article_url } final_list.append(record) results = sorted(final_list, key=lambda x: x['company_name']) return results # # def connect_to_sheet(json_keyfile, sheet_name): # """Authenticates and returns the worksheet object.""" # try: # gc = gspread.service_account(filename=json_keyfile) # sh = gc.open(sheet_name) # return sh.sheet1 # except Exception as e: # print(f"❌ Error connecting to Google Sheets: {e}") # return None # # # def get_cached_websites(worksheet): # """ # Returns a dictionary of existing companies: {'Tesla': 'tesla.com', ...} # """ # if not worksheet: return {} # # print("📂 Reading cache from Google Sheets...") # try: # records = worksheet.get_all_records() # # Convert list of dicts to a lookup map # return { # row['company_name']: row['company_website'] # for row in records # if row.get('company_name') # } # except Exception: # return {} # # # def save_new_websites(worksheet, new_data): # """ # Appends new data to the sheet. # Expects a list of dicts: [{'company_name': 'X', 'company_website': 'Y'}] # """ # if not worksheet or not new_data: return # # print(f"💾 Saving {len(new_data)} new entries to Google Sheets...") # # # Prepare rows as list of lists: [['Name', 'URL'], ['Name', 'URL']] # rows = [[item['company_name'], item['company_website']] for item in new_data] # # # Add headers if sheet is empty # if not worksheet.get_all_values(): # worksheet.append_row(["company_name", "company_website"]) # # worksheet.append_rows(rows)