newFinderAgent_v2 / src /streamlit_app.py
chummchumm's picture
Upload 6 files
8b425b2 verified
import streamlit as st
import pandas as pd
from datetime import datetime
from search import search_news, fill_missing_urls
from fetch_and_extract import get_companies_and_articles
from helpers import match_companies_to_articles
from config import MAX_NEWS_PER_TOPIC, MAX_TOPICS, COMPANY_CACHE_SHEET_NAME
import os
# --- PAGE CONFIGURATION ---
st.set_page_config(page_title="News Finder Agent", page_icon="πŸ•΅οΈ", layout="wide")
# --- SESSION STATE INITIALIZATION ---
if 'results_data' not in st.session_state:
st.session_state.results_data = None
# --- MAIN INTERFACE ---
st.title("πŸ•΅οΈ News Finder AI Agent")
st.markdown("Enter your topics below to generate a report of companies mentioned in the news.")
# 1. TOPIC INPUT
topics_input = st.text_area(
f"1. Topics (Comma separated), maximum {MAX_TOPICS} topics",
placeholder="e.g. Artificial Intelligence, Nvidia, Supply Chain Logistics, Green Energy...",
help="Paste your long list of topics here. The agent will dedup and search for all of them."
)
# CHANGED: Created 3 columns to fit the new field neatly
col_geo, col_time, col_limit = st.columns(3)
# 2. GEOGRAPHY INPUT
iso_countries = {
# --- GLOBAL & NORTH AMERICA ---
"Global": "any",
"United States": "us",
"Canada": "ca",
# --- ASIA PACIFIC ---
"Australia": "au",
"China": "cn",
"India": "in",
"Japan": "jp",
"Malaysia": "my",
"South Korea": "kr",
"Singapore": "sg",
"Taiwan": "tw",
"Hong Kong": "hk",
# --- EUROPE (WESTERN) ---
"United Kingdom": "gb",
"Germany": "de",
"France": "fr",
"Italy": "it",
"Spain": "es",
"Netherlands": "nl",
"Belgium": "be",
"Switzerland": "ch",
"Austria": "at",
"Ireland": "ie",
"Luxembourg": "lu",
"Portugal": "pt",
# --- EUROPE (NORDIC) ---
"Sweden": "se",
"Norway": "no",
"Denmark": "dk",
"Finland": "fi",
"Iceland": "is",
# --- EUROPE (CENTRAL & EASTERN) ---
"Poland": "pl",
"Czech Republic": "cz",
"Hungary": "hu",
"Romania": "ro",
"Ukraine": "ua",
"Greece": "gr",
"Turkey": "tr",
"Bulgaria": "bg",
"Croatia": "hr",
"Slovakia": "sk",
"Slovenia": "si",
"Serbia": "rs",
# --- EUROPE (BALTIC) ---
"Estonia": "ee",
"Latvia": "lv",
"Lithuania": "lt",
}
with col_geo:
selected_country = st.selectbox(
"2. Geography",
options=list(iso_countries.keys()),
index=0
)
geo_code = iso_countries[selected_country]
# 3. TIME FRAME INPUT
with col_time:
days_back = st.slider(
"3. Time Frame (Days Back)",
min_value=1,
max_value=30,
value=7,
help="How far back should we search for news?"
)
# 4. MAX ARTICLES INPUT
with col_limit:
max_news = st.number_input(
"4. Max Articles per Topic",
min_value=10,
max_value=MAX_NEWS_PER_TOPIC, # Restricted by config
value=min(50, MAX_NEWS_PER_TOPIC),
step=10,
help=f"Control costs by limiting articles. Max allowed: {MAX_NEWS_PER_TOPIC}"
)
# --- ACTION BUTTON ---
if st.button("πŸš€ Find News & Extract Companies", type="primary"):
if not topics_input:
st.error("⚠️ Please enter at least one topic.")
else:
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
SERPER_API_KEY = os.environ.get('SERPER_API_KEY')
topic_list = [t.strip() for t in topics_input.split(",") if t.strip()]
# ENFORCE LIMIT ON TOPICS
if len(topic_list) > MAX_TOPICS:
st.warning(
f"⚠️ Limit Reached: You entered {len(topic_list)} topics. Processing only the first {MAX_TOPICS}.")
topic_list = topic_list[:MAX_TOPICS]
with st.status("πŸ€– Agent is working...", expanded=True) as status:
st.write(f"πŸ” Searching {len(topic_list)} topics in {selected_country} (Max {max_news} articles each)...")
# 1. Search News
articles = search_news(topic_list, geo_code, days_back, max_news, SERPER_API_KEY, selected_country)
if not articles:
status.update(label="❌ No news found!", state="error")
st.stop()
st.write(f"βœ… Found {len(articles)} unique articles. πŸ› οΈ Extracting companies with LLM...")
# 2. Extract Companies (LLM)
urls_to_process = [a['link'] for a in articles]
articles_with_companies_from_llm = get_companies_and_articles(urls_to_process, OPENAI_API_KEY)
st.write(f"βœ… Generating results...")
# 3. Combine & Fill URLs
matched_results = match_companies_to_articles(articles, articles_with_companies_from_llm)
structured_results = fill_missing_urls(matched_results, COMPANY_CACHE_SHEET_NAME, SERPER_API_KEY)
status.update(label="βœ… Search Complete!", state="complete", expanded=False)
# SAVE RESULTS
if structured_results:
st.session_state.results_data = pd.DataFrame(structured_results)
else:
st.warning("No companies found in the extracted text.")
# --- RESULTS & DOWNLOAD ---
if st.session_state.results_data is not None:
st.divider()
st.subheader("πŸ“‚ Extracted Data")
st.dataframe(
st.session_state.results_data,
column_config={
"company_url": st.column_config.LinkColumn(
"Website" # Full URL shown, clickable
),
"article_url": st.column_config.LinkColumn(
"Source Article" # Full URL shown, clickable
),
},
use_container_width=True
)
csv = st.session_state.results_data.to_csv(index=False).encode('utf-8')
st.download_button(
label="πŸ“₯ Download Results as CSV",
data=csv,
file_name=f"news_extraction_{datetime.now().strftime('%Y%m%d_%H%M')}.csv",
mime="text/csv",
type="primary"
)