newFinderAgent_v2 / src /helpers.py
chummchumm's picture
Upload 6 files
8b425b2 verified
#import gspread
def match_companies_to_articles(articles_metadata, ai_results):
# A. Create a lookup dictionary: URL -> Title
# This allows instant access to titles without looping every time
url_to_title_map = {item['link']: item['title'] for item in articles_metadata}
final_list = []
for result in ai_results:
article_url = result.get('url')
# Look up the title, default to "Unknown" if the URL isn't in metadata
article_title = url_to_title_map.get(article_url, "Unknown Title")
# Iterate through the companies found in this specific article
if 'companies' in result:
for company in result['companies']:
record = {
"company_name": company['name'],
"company_url": company.get('url', ''), # Handle missing URLs gracefully
"article_title": article_title,
"article_url": article_url
}
final_list.append(record)
results = sorted(final_list, key=lambda x: x['company_name'])
return results
#
# def connect_to_sheet(json_keyfile, sheet_name):
# """Authenticates and returns the worksheet object."""
# try:
# gc = gspread.service_account(filename=json_keyfile)
# sh = gc.open(sheet_name)
# return sh.sheet1
# except Exception as e:
# print(f"❌ Error connecting to Google Sheets: {e}")
# return None
#
#
# def get_cached_websites(worksheet):
# """
# Returns a dictionary of existing companies: {'Tesla': 'tesla.com', ...}
# """
# if not worksheet: return {}
#
# print("πŸ“‚ Reading cache from Google Sheets...")
# try:
# records = worksheet.get_all_records()
# # Convert list of dicts to a lookup map
# return {
# row['company_name']: row['company_website']
# for row in records
# if row.get('company_name')
# }
# except Exception:
# return {}
#
#
# def save_new_websites(worksheet, new_data):
# """
# Appends new data to the sheet.
# Expects a list of dicts: [{'company_name': 'X', 'company_website': 'Y'}]
# """
# if not worksheet or not new_data: return
#
# print(f"πŸ’Ύ Saving {len(new_data)} new entries to Google Sheets...")
#
# # Prepare rows as list of lists: [['Name', 'URL'], ['Name', 'URL']]
# rows = [[item['company_name'], item['company_website']] for item in new_data]
#
# # Add headers if sheet is empty
# if not worksheet.get_all_values():
# worksheet.append_row(["company_name", "company_website"])
#
# worksheet.append_rows(rows)