Spaces:

chummchumm
/

newFinderAgent_v2

Paused

App Files Files Community

newFinderAgent_v2 / src /helpers.py

chummchumm

Upload 6 files

8b425b2 verified 11 days ago

raw

history blame contribute delete

2.62 kB

	#import gspread


	def match_companies_to_articles(articles_metadata, ai_results):
	# A. Create a lookup dictionary: URL -> Title
	# This allows instant access to titles without looping every time
	url_to_title_map = {item['link']: item['title'] for item in articles_metadata}

	final_list = []

	for result in ai_results:
	article_url = result.get('url')
	# Look up the title, default to "Unknown" if the URL isn't in metadata
	article_title = url_to_title_map.get(article_url, "Unknown Title")

	# Iterate through the companies found in this specific article
	if 'companies' in result:
	for company in result['companies']:
	record = {
	"company_name": company['name'],
	"company_url": company.get('url', ''), # Handle missing URLs gracefully
	"article_title": article_title,
	"article_url": article_url
	}
	final_list.append(record)

	results = sorted(final_list, key=lambda x: x['company_name'])
	return results
	#
	# def connect_to_sheet(json_keyfile, sheet_name):
	# """Authenticates and returns the worksheet object."""
	# try:
	# gc = gspread.service_account(filename=json_keyfile)
	# sh = gc.open(sheet_name)
	# return sh.sheet1
	# except Exception as e:
	# print(f"❌ Error connecting to Google Sheets: {e}")
	# return None
	#
	#
	# def get_cached_websites(worksheet):
	# """
	# Returns a dictionary of existing companies: {'Tesla': 'tesla.com', ...}
	# """
	# if not worksheet: return {}
	#
	# print("📂 Reading cache from Google Sheets...")
	# try:
	# records = worksheet.get_all_records()
	# # Convert list of dicts to a lookup map
	# return {
	# row['company_name']: row['company_website']
	# for row in records
	# if row.get('company_name')
	# }
	# except Exception:
	# return {}
	#
	#
	# def save_new_websites(worksheet, new_data):
	# """
	# Appends new data to the sheet.
	# Expects a list of dicts: [{'company_name': 'X', 'company_website': 'Y'}]
	# """
	# if not worksheet or not new_data: return
	#
	# print(f"💾 Saving {len(new_data)} new entries to Google Sheets...")
	#
	# # Prepare rows as list of lists: [['Name', 'URL'], ['Name', 'URL']]
	# rows = [[item['company_name'], item['company_website']] for item in new_data]
	#
	# # Add headers if sheet is empty
	# if not worksheet.get_all_values():
	# worksheet.append_row(["company_name", "company_website"])
	#
	# worksheet.append_rows(rows)