import streamlit as st import pandas as pd from datetime import datetime from search import search_news, fill_missing_urls from fetch_and_extract import get_companies_and_articles from helpers import match_companies_to_articles from config import MAX_NEWS_PER_TOPIC, MAX_TOPICS, COMPANY_CACHE_SHEET_NAME import os # --- PAGE CONFIGURATION --- st.set_page_config(page_title="News Finder Agent", page_icon="🕵️", layout="wide") # --- SESSION STATE INITIALIZATION --- if 'results_data' not in st.session_state: st.session_state.results_data = None # --- MAIN INTERFACE --- st.title("🕵️ News Finder AI Agent") st.markdown("Enter your topics below to generate a report of companies mentioned in the news.") # 1. TOPIC INPUT topics_input = st.text_area( f"1. Topics (Comma separated), maximum {MAX_TOPICS} topics", placeholder="e.g. Artificial Intelligence, Nvidia, Supply Chain Logistics, Green Energy...", help="Paste your long list of topics here. The agent will dedup and search for all of them." ) # CHANGED: Created 3 columns to fit the new field neatly col_geo, col_time, col_limit = st.columns(3) # 2. GEOGRAPHY INPUT iso_countries = { # --- GLOBAL & NORTH AMERICA --- "Global": "any", "United States": "us", "Canada": "ca", # --- ASIA PACIFIC --- "Australia": "au", "China": "cn", "India": "in", "Japan": "jp", "Malaysia": "my", "South Korea": "kr", "Singapore": "sg", "Taiwan": "tw", "Hong Kong": "hk", # --- EUROPE (WESTERN) --- "United Kingdom": "gb", "Germany": "de", "France": "fr", "Italy": "it", "Spain": "es", "Netherlands": "nl", "Belgium": "be", "Switzerland": "ch", "Austria": "at", "Ireland": "ie", "Luxembourg": "lu", "Portugal": "pt", # --- EUROPE (NORDIC) --- "Sweden": "se", "Norway": "no", "Denmark": "dk", "Finland": "fi", "Iceland": "is", # --- EUROPE (CENTRAL & EASTERN) --- "Poland": "pl", "Czech Republic": "cz", "Hungary": "hu", "Romania": "ro", "Ukraine": "ua", "Greece": "gr", "Turkey": "tr", "Bulgaria": "bg", "Croatia": "hr", "Slovakia": "sk", "Slovenia": "si", "Serbia": "rs", # --- EUROPE (BALTIC) --- "Estonia": "ee", "Latvia": "lv", "Lithuania": "lt", } with col_geo: selected_country = st.selectbox( "2. Geography", options=list(iso_countries.keys()), index=0 ) geo_code = iso_countries[selected_country] # 3. TIME FRAME INPUT with col_time: days_back = st.slider( "3. Time Frame (Days Back)", min_value=1, max_value=30, value=7, help="How far back should we search for news?" ) # 4. MAX ARTICLES INPUT with col_limit: max_news = st.number_input( "4. Max Articles per Topic", min_value=10, max_value=MAX_NEWS_PER_TOPIC, # Restricted by config value=min(50, MAX_NEWS_PER_TOPIC), step=10, help=f"Control costs by limiting articles. Max allowed: {MAX_NEWS_PER_TOPIC}" ) # --- ACTION BUTTON --- if st.button("🚀 Find News & Extract Companies", type="primary"): if not topics_input: st.error("⚠️ Please enter at least one topic.") else: OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") SERPER_API_KEY = os.environ.get('SERPER_API_KEY') topic_list = [t.strip() for t in topics_input.split(",") if t.strip()] # ENFORCE LIMIT ON TOPICS if len(topic_list) > MAX_TOPICS: st.warning( f"⚠️ Limit Reached: You entered {len(topic_list)} topics. Processing only the first {MAX_TOPICS}.") topic_list = topic_list[:MAX_TOPICS] with st.status("🤖 Agent is working...", expanded=True) as status: st.write(f"🔍 Searching {len(topic_list)} topics in {selected_country} (Max {max_news} articles each)...") # 1. Search News articles = search_news(topic_list, geo_code, days_back, max_news, SERPER_API_KEY, selected_country) if not articles: status.update(label="❌ No news found!", state="error") st.stop() st.write(f"✅ Found {len(articles)} unique articles. 🛠️ Extracting companies with LLM...") # 2. Extract Companies (LLM) urls_to_process = [a['link'] for a in articles] articles_with_companies_from_llm = get_companies_and_articles(urls_to_process, OPENAI_API_KEY) st.write(f"✅ Generating results...") # 3. Combine & Fill URLs matched_results = match_companies_to_articles(articles, articles_with_companies_from_llm) structured_results = fill_missing_urls(matched_results, COMPANY_CACHE_SHEET_NAME, SERPER_API_KEY) status.update(label="✅ Search Complete!", state="complete", expanded=False) # SAVE RESULTS if structured_results: st.session_state.results_data = pd.DataFrame(structured_results) else: st.warning("No companies found in the extracted text.") # --- RESULTS & DOWNLOAD --- if st.session_state.results_data is not None: st.divider() st.subheader("📂 Extracted Data") st.dataframe( st.session_state.results_data, column_config={ "company_url": st.column_config.LinkColumn( "Website" # Full URL shown, clickable ), "article_url": st.column_config.LinkColumn( "Source Article" # Full URL shown, clickable ), }, use_container_width=True ) csv = st.session_state.results_data.to_csv(index=False).encode('utf-8') st.download_button( label="📥 Download Results as CSV", data=csv, file_name=f"news_extraction_{datetime.now().strftime('%Y%m%d_%H%M')}.csv", mime="text/csv", type="primary" )