File size: 2,619 Bytes
8b425b2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 | #import gspread
def match_companies_to_articles(articles_metadata, ai_results):
# A. Create a lookup dictionary: URL -> Title
# This allows instant access to titles without looping every time
url_to_title_map = {item['link']: item['title'] for item in articles_metadata}
final_list = []
for result in ai_results:
article_url = result.get('url')
# Look up the title, default to "Unknown" if the URL isn't in metadata
article_title = url_to_title_map.get(article_url, "Unknown Title")
# Iterate through the companies found in this specific article
if 'companies' in result:
for company in result['companies']:
record = {
"company_name": company['name'],
"company_url": company.get('url', ''), # Handle missing URLs gracefully
"article_title": article_title,
"article_url": article_url
}
final_list.append(record)
results = sorted(final_list, key=lambda x: x['company_name'])
return results
#
# def connect_to_sheet(json_keyfile, sheet_name):
# """Authenticates and returns the worksheet object."""
# try:
# gc = gspread.service_account(filename=json_keyfile)
# sh = gc.open(sheet_name)
# return sh.sheet1
# except Exception as e:
# print(f"โ Error connecting to Google Sheets: {e}")
# return None
#
#
# def get_cached_websites(worksheet):
# """
# Returns a dictionary of existing companies: {'Tesla': 'tesla.com', ...}
# """
# if not worksheet: return {}
#
# print("๐ Reading cache from Google Sheets...")
# try:
# records = worksheet.get_all_records()
# # Convert list of dicts to a lookup map
# return {
# row['company_name']: row['company_website']
# for row in records
# if row.get('company_name')
# }
# except Exception:
# return {}
#
#
# def save_new_websites(worksheet, new_data):
# """
# Appends new data to the sheet.
# Expects a list of dicts: [{'company_name': 'X', 'company_website': 'Y'}]
# """
# if not worksheet or not new_data: return
#
# print(f"๐พ Saving {len(new_data)} new entries to Google Sheets...")
#
# # Prepare rows as list of lists: [['Name', 'URL'], ['Name', 'URL']]
# rows = [[item['company_name'], item['company_website']] for item in new_data]
#
# # Add headers if sheet is empty
# if not worksheet.get_all_values():
# worksheet.append_row(["company_name", "company_website"])
#
# worksheet.append_rows(rows)
|