| #import gspread | |
| def match_companies_to_articles(articles_metadata, ai_results): | |
| # A. Create a lookup dictionary: URL -> Title | |
| # This allows instant access to titles without looping every time | |
| url_to_title_map = {item['link']: item['title'] for item in articles_metadata} | |
| final_list = [] | |
| for result in ai_results: | |
| article_url = result.get('url') | |
| # Look up the title, default to "Unknown" if the URL isn't in metadata | |
| article_title = url_to_title_map.get(article_url, "Unknown Title") | |
| # Iterate through the companies found in this specific article | |
| if 'companies' in result: | |
| for company in result['companies']: | |
| record = { | |
| "company_name": company['name'], | |
| "company_url": company.get('url', ''), # Handle missing URLs gracefully | |
| "article_title": article_title, | |
| "article_url": article_url | |
| } | |
| final_list.append(record) | |
| results = sorted(final_list, key=lambda x: x['company_name']) | |
| return results | |
| # | |
| # def connect_to_sheet(json_keyfile, sheet_name): | |
| # """Authenticates and returns the worksheet object.""" | |
| # try: | |
| # gc = gspread.service_account(filename=json_keyfile) | |
| # sh = gc.open(sheet_name) | |
| # return sh.sheet1 | |
| # except Exception as e: | |
| # print(f"β Error connecting to Google Sheets: {e}") | |
| # return None | |
| # | |
| # | |
| # def get_cached_websites(worksheet): | |
| # """ | |
| # Returns a dictionary of existing companies: {'Tesla': 'tesla.com', ...} | |
| # """ | |
| # if not worksheet: return {} | |
| # | |
| # print("π Reading cache from Google Sheets...") | |
| # try: | |
| # records = worksheet.get_all_records() | |
| # # Convert list of dicts to a lookup map | |
| # return { | |
| # row['company_name']: row['company_website'] | |
| # for row in records | |
| # if row.get('company_name') | |
| # } | |
| # except Exception: | |
| # return {} | |
| # | |
| # | |
| # def save_new_websites(worksheet, new_data): | |
| # """ | |
| # Appends new data to the sheet. | |
| # Expects a list of dicts: [{'company_name': 'X', 'company_website': 'Y'}] | |
| # """ | |
| # if not worksheet or not new_data: return | |
| # | |
| # print(f"πΎ Saving {len(new_data)} new entries to Google Sheets...") | |
| # | |
| # # Prepare rows as list of lists: [['Name', 'URL'], ['Name', 'URL']] | |
| # rows = [[item['company_name'], item['company_website']] for item in new_data] | |
| # | |
| # # Add headers if sheet is empty | |
| # if not worksheet.get_all_values(): | |
| # worksheet.append_row(["company_name", "company_website"]) | |
| # | |
| # worksheet.append_rows(rows) | |