Spaces:

sigridveronica
/

ai-news-analyzer

Runtime error

Sigrid De los Santos

Remove remaining binary file for Hugging Face

9c57dcd 6 months ago

7.78 kB

	import os
	import sys
	from datetime import datetime
	from dotenv import load_dotenv

	from image_search import search_unsplash_image
	from md_html import convert_single_md_to_html as convert_md_to_html
	from news_analysis import fetch_deep_news, generate_value_investor_report

	import pandas as pd
	from csv_utils import detect_changes


	# Setup paths
	BASE_DIR = os.path.dirname(os.path.dirname(__file__)) # one level up from src/
	DATA_DIR = os.path.join(BASE_DIR, "data")
	HTML_DIR = os.path.join(BASE_DIR, "html")
	CSV_PATH = os.path.join(BASE_DIR, "investing_topics.csv")

	os.makedirs(DATA_DIR, exist_ok=True)
	os.makedirs(HTML_DIR, exist_ok=True)

	# Load .env
	load_dotenv()

	def build_metrics_box(topic, num_articles):
	now = datetime.now().strftime("%Y-%m-%d %H:%M")
	return f"""
	> Topic: `{topic}`
	> Articles Collected: `{num_articles}`
	> Generated: `{now}`
	>
	"""

	def run_value_investing_analysis(csv_path):
	current_df = pd.read_csv(csv_path)
	prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")
	if os.path.exists(prev_path):
	previous_df = pd.read_csv(prev_path)
	changed_df = detect_changes(current_df, previous_df)
	if changed_df.empty:
	print("✅ No changes detected. Skipping processing.")
	return []
	else:
	changed_df = current_df

	new_md_files = []

	for _, row in changed_df.iterrows():
	topic = row.get("topic")
	timespan = row.get("timespan_days", 7)
	print(f"\n🔍 Processing: {topic} ({timespan} days)")

	news = fetch_deep_news(topic, timespan)
	if not news:
	print(f"⚠️ No news found for: {topic}")
	continue

	report_body = generate_value_investor_report(topic, news)
	from image_search import search_unsplash_image

	# Later inside your loop
	image_url, image_credit = search_unsplash_image(topic, os.getenv("OPENAI_API_KEY"))

	metrics_md = build_metrics_box(topic, len(news))
	full_md = metrics_md + report_body

	base_filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}"
	filename = base_filename + ".md"
	filepath = os.path.join(DATA_DIR, filename)

	counter = 1
	while os.path.exists(filepath):
	filename = f"{base_filename}_{counter}.md"
	filepath = os.path.join(DATA_DIR, filename)
	counter += 1

	with open(filepath, "w", encoding="utf-8") as f:
	f.write(full_md)

	new_md_files.append(filepath)

	print(f"✅ Markdown saved to: {DATA_DIR}")
	current_df.to_csv(prev_path, index=False)
	return new_md_files


	def run_pipeline(csv_path, tavily_api_key):
	os.environ["TAVILY_API_KEY"] = tavily_api_key

	new_md_files = run_value_investing_analysis(csv_path)
	new_html_paths = []

	for md_path in new_md_files:
	convert_md_to_html(md_path, HTML_DIR)
	html_path = os.path.join(HTML_DIR, os.path.basename(md_path).replace(".md", ".html"))
	new_html_paths.append(html_path)

	return new_html_paths


	if __name__ == "__main__":
	md_files = run_value_investing_analysis(CSV_PATH)
	for md in md_files:
	convert_md_to_html(md, HTML_DIR)
	print(f"🌐 All reports converted to HTML at: {HTML_DIR}")


	# import os
	# import sys
	# from datetime import datetime
	# from dotenv import load_dotenv

	# #rom news_analysis import load_csv, fetch_deep_news, generate_value_investor_report
	# from image_search import search_unsplash_image
	# from md_html import convert_md_folder_to_html
	# from md_html import convert_single_md_to_html as convert_md_to_html


	# from news_analysis import fetch_deep_news, generate_value_investor_report

	# import pandas as pd
	# from csv_utils import detect_changes


	# # Adds the absolute path of /external to your module path
	# BASE_DIR = os.path.dirname(os.path.abspath(__file__))
	# EXTERNAL_PATH = os.path.join(BASE_DIR, "external")
	# if EXTERNAL_PATH not in sys.path:
	# sys.path.append(EXTERNAL_PATH)

	# # Load .env
	# load_dotenv()

	# # === Base Folder Setup ===
	# BASE_DIR = os.path.dirname(os.path.dirname(__file__)) # one level up from src/
	# DATA_DIR = os.path.join(BASE_DIR, "data")
	# HTML_DIR = os.path.join(BASE_DIR, "html")
	# CSV_PATH = os.path.join(BASE_DIR, "investing_topics.csv")



	# # Ensure output folders exist
	# os.makedirs(DATA_DIR, exist_ok=True)
	# os.makedirs(HTML_DIR, exist_ok=True)

	# # === Metrics Block ===
	# def build_metrics_box(topic, num_articles):
	# now = datetime.now().strftime("%Y-%m-%d %H:%M")
	# return f"""
	# > Topic: `{topic}`
	# > Articles Collected: `{num_articles}`
	# > Generated: `{now}`
	# >
	# """

	# # === Main Logic ===
	# def run_value_investing_analysis(csv_path):
	# current_df = pd.read_csv(csv_path)

	# prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")
	# if os.path.exists(prev_path):
	# previous_df = pd.read_csv(prev_path)
	# changed_df = detect_changes(current_df, previous_df)
	# if changed_df.empty:
	# print("✅ No changes detected. Skipping processing.")
	# return
	# else:
	# changed_df = current_df

	# for _, row in changed_df.iterrows():
	# topic = row.get("topic")
	# timespan = row.get("timespan_days", 7)
	# print(f"\n🔍 Processing: {topic} ({timespan} days)")

	# news = fetch_deep_news(topic, timespan)
	# if not news:
	# print(f"⚠️ No news found for: {topic}")
	# continue

	# report_body = generate_value_investor_report(topic, news)
	# image_url, image_credit = search_unsplash_image(topic)
	# metrics_md = build_metrics_box(topic, len(news))
	# full_md = metrics_md + report_body

	# base_filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}"
	# filename = base_filename + ".md"
	# filepath = os.path.join(DATA_DIR, filename)

	# counter = 1
	# while os.path.exists(filepath):
	# filename = f"{base_filename}_{counter}.md"
	# filepath = os.path.join(DATA_DIR, filename)
	# counter += 1

	# with open(filepath, "w", encoding="utf-8") as f:
	# f.write(full_md)

	# print(f"✅ Markdown saved to: {DATA_DIR}")
	# current_df.to_csv(prev_path, index=False) # Save current as previous for next run

	# #convert_md_folder_to_html(DATA_DIR, HTML_DIR)
	# #print(f"🌐 All reports converted to HTML at: {HTML_DIR}")

	# # === Streamlit Integration Wrapper ===
	# def run_pipeline(csv_path, tavily_api_key):
	# """
	# Runs the full analysis pipeline for Streamlit.

	# Returns:
	# str: Path to the generated HTML report.
	# """
	# os.environ["TAVILY_API_KEY"] = tavily_api_key

	# run_value_investing_analysis(csv_path)

	# # Combine all generated markdown into one file
	# combined_md_path = os.path.join(DATA_DIR, "combined_report.md")
	# with open(combined_md_path, "w", encoding="utf-8") as outfile:
	# for fname in os.listdir(DATA_DIR):
	# if fname.endswith(".md"):
	# with open(os.path.join(DATA_DIR, fname), "r", encoding="utf-8") as f:
	# outfile.write(f.read() + "\n\n---\n\n")

	# # Convert to HTML
	# # html_output_path = os.path.join(HTML_DIR, "news_report.html")
	# # convert_md_to_html(combined_md_path, html_output_path)
	# convert_md_to_html(combined_md_path, HTML_DIR)
	# html_output_path = os.path.join(HTML_DIR, "combined_report.html")


	# return html_output_path


	# # === Run ===
	# if __name__ == "__main__":
	# run_value_investing_analysis(CSV_PATH)
	# convert_md_folder_to_html(DATA_DIR, HTML_DIR)
	# print(f"🌐 All reports converted to HTML at: {HTML_DIR}")