Spaces:

chummchumm
/

newFinderAgent_v2

Paused

App Files Files Community

newFinderAgent_v2 / src /streamlit_app.py

chummchumm

Upload 6 files

8b425b2 verified 11 days ago

raw

history blame contribute delete

5.96 kB

	import streamlit as st
	import pandas as pd
	from datetime import datetime
	from search import search_news, fill_missing_urls
	from fetch_and_extract import get_companies_and_articles
	from helpers import match_companies_to_articles
	from config import MAX_NEWS_PER_TOPIC, MAX_TOPICS, COMPANY_CACHE_SHEET_NAME
	import os

	# --- PAGE CONFIGURATION ---
	st.set_page_config(page_title="News Finder Agent", page_icon="🕵️", layout="wide")

	# --- SESSION STATE INITIALIZATION ---
	if 'results_data' not in st.session_state:
	st.session_state.results_data = None

	# --- MAIN INTERFACE ---
	st.title("🕵️ News Finder AI Agent")
	st.markdown("Enter your topics below to generate a report of companies mentioned in the news.")

	# 1. TOPIC INPUT
	topics_input = st.text_area(
	f"1. Topics (Comma separated), maximum {MAX_TOPICS} topics",
	placeholder="e.g. Artificial Intelligence, Nvidia, Supply Chain Logistics, Green Energy...",
	help="Paste your long list of topics here. The agent will dedup and search for all of them."
	)

	# CHANGED: Created 3 columns to fit the new field neatly
	col_geo, col_time, col_limit = st.columns(3)

	# 2. GEOGRAPHY INPUT
	iso_countries = {
	# --- GLOBAL & NORTH AMERICA ---
	"Global": "any",
	"United States": "us",
	"Canada": "ca",

	# --- ASIA PACIFIC ---
	"Australia": "au",
	"China": "cn",
	"India": "in",
	"Japan": "jp",
	"Malaysia": "my",
	"South Korea": "kr",
	"Singapore": "sg",
	"Taiwan": "tw",
	"Hong Kong": "hk",

	# --- EUROPE (WESTERN) ---
	"United Kingdom": "gb",
	"Germany": "de",
	"France": "fr",
	"Italy": "it",
	"Spain": "es",
	"Netherlands": "nl",
	"Belgium": "be",
	"Switzerland": "ch",
	"Austria": "at",
	"Ireland": "ie",
	"Luxembourg": "lu",
	"Portugal": "pt",

	# --- EUROPE (NORDIC) ---
	"Sweden": "se",
	"Norway": "no",
	"Denmark": "dk",
	"Finland": "fi",
	"Iceland": "is",

	# --- EUROPE (CENTRAL & EASTERN) ---
	"Poland": "pl",
	"Czech Republic": "cz",
	"Hungary": "hu",
	"Romania": "ro",
	"Ukraine": "ua",
	"Greece": "gr",
	"Turkey": "tr",
	"Bulgaria": "bg",
	"Croatia": "hr",
	"Slovakia": "sk",
	"Slovenia": "si",
	"Serbia": "rs",

	# --- EUROPE (BALTIC) ---
	"Estonia": "ee",
	"Latvia": "lv",
	"Lithuania": "lt",
	}

	with col_geo:
	selected_country = st.selectbox(
	"2. Geography",
	options=list(iso_countries.keys()),
	index=0
	)
	geo_code = iso_countries[selected_country]

	# 3. TIME FRAME INPUT
	with col_time:
	days_back = st.slider(
	"3. Time Frame (Days Back)",
	min_value=1,
	max_value=30,
	value=7,
	help="How far back should we search for news?"
	)

	# 4. MAX ARTICLES INPUT
	with col_limit:
	max_news = st.number_input(
	"4. Max Articles per Topic",
	min_value=10,
	max_value=MAX_NEWS_PER_TOPIC, # Restricted by config
	value=min(50, MAX_NEWS_PER_TOPIC),
	step=10,
	help=f"Control costs by limiting articles. Max allowed: {MAX_NEWS_PER_TOPIC}"
	)

	# --- ACTION BUTTON ---
	if st.button("🚀 Find News & Extract Companies", type="primary"):
	if not topics_input:
	st.error("⚠️ Please enter at least one topic.")
	else:
	OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
	SERPER_API_KEY = os.environ.get('SERPER_API_KEY')

	topic_list = [t.strip() for t in topics_input.split(",") if t.strip()]

	# ENFORCE LIMIT ON TOPICS
	if len(topic_list) > MAX_TOPICS:
	st.warning(
	f"⚠️ Limit Reached: You entered {len(topic_list)} topics. Processing only the first {MAX_TOPICS}.")
	topic_list = topic_list[:MAX_TOPICS]

	with st.status("🤖 Agent is working...", expanded=True) as status:
	st.write(f"🔍 Searching {len(topic_list)} topics in {selected_country} (Max {max_news} articles each)...")

	# 1. Search News
	articles = search_news(topic_list, geo_code, days_back, max_news, SERPER_API_KEY, selected_country)

	if not articles:
	status.update(label="❌ No news found!", state="error")
	st.stop()

	st.write(f"✅ Found {len(articles)} unique articles. 🛠️ Extracting companies with LLM...")

	# 2. Extract Companies (LLM)
	urls_to_process = [a['link'] for a in articles]
	articles_with_companies_from_llm = get_companies_and_articles(urls_to_process, OPENAI_API_KEY)

	st.write(f"✅ Generating results...")

	# 3. Combine & Fill URLs
	matched_results = match_companies_to_articles(articles, articles_with_companies_from_llm)
	structured_results = fill_missing_urls(matched_results, COMPANY_CACHE_SHEET_NAME, SERPER_API_KEY)

	status.update(label="✅ Search Complete!", state="complete", expanded=False)

	# SAVE RESULTS
	if structured_results:
	st.session_state.results_data = pd.DataFrame(structured_results)
	else:
	st.warning("No companies found in the extracted text.")

	# --- RESULTS & DOWNLOAD ---
	if st.session_state.results_data is not None:
	st.divider()
	st.subheader("📂 Extracted Data")

	st.dataframe(
	st.session_state.results_data,
	column_config={
	"company_url": st.column_config.LinkColumn(
	"Website" # Full URL shown, clickable
	),
	"article_url": st.column_config.LinkColumn(
	"Source Article" # Full URL shown, clickable
	),
	},
	use_container_width=True
	)

	csv = st.session_state.results_data.to_csv(index=False).encode('utf-8')
	st.download_button(
	label="📥 Download Results as CSV",
	data=csv,
	file_name=f"news_extraction_{datetime.now().strftime('%Y%m%d_%H%M')}.csv",
	mime="text/csv",
	type="primary"
	)