| import streamlit as st |
| import pandas as pd |
| from datetime import datetime |
| from search import search_news, fill_missing_urls |
| from fetch_and_extract import get_companies_and_articles |
| from helpers import match_companies_to_articles |
| from config import MAX_NEWS_PER_TOPIC, MAX_TOPICS, COMPANY_CACHE_SHEET_NAME |
| import os |
|
|
| |
| st.set_page_config(page_title="News Finder Agent", page_icon="π΅οΈ", layout="wide") |
|
|
| |
| if 'results_data' not in st.session_state: |
| st.session_state.results_data = None |
|
|
| |
| st.title("π΅οΈ News Finder AI Agent") |
| st.markdown("Enter your topics below to generate a report of companies mentioned in the news.") |
|
|
| |
| topics_input = st.text_area( |
| f"1. Topics (Comma separated), maximum {MAX_TOPICS} topics", |
| placeholder="e.g. Artificial Intelligence, Nvidia, Supply Chain Logistics, Green Energy...", |
| help="Paste your long list of topics here. The agent will dedup and search for all of them." |
| ) |
|
|
| |
| col_geo, col_time, col_limit = st.columns(3) |
|
|
| |
| iso_countries = { |
| |
| "Global": "any", |
| "United States": "us", |
| "Canada": "ca", |
|
|
| |
| "Australia": "au", |
| "China": "cn", |
| "India": "in", |
| "Japan": "jp", |
| "Malaysia": "my", |
| "South Korea": "kr", |
| "Singapore": "sg", |
| "Taiwan": "tw", |
| "Hong Kong": "hk", |
|
|
| |
| "United Kingdom": "gb", |
| "Germany": "de", |
| "France": "fr", |
| "Italy": "it", |
| "Spain": "es", |
| "Netherlands": "nl", |
| "Belgium": "be", |
| "Switzerland": "ch", |
| "Austria": "at", |
| "Ireland": "ie", |
| "Luxembourg": "lu", |
| "Portugal": "pt", |
|
|
| |
| "Sweden": "se", |
| "Norway": "no", |
| "Denmark": "dk", |
| "Finland": "fi", |
| "Iceland": "is", |
|
|
| |
| "Poland": "pl", |
| "Czech Republic": "cz", |
| "Hungary": "hu", |
| "Romania": "ro", |
| "Ukraine": "ua", |
| "Greece": "gr", |
| "Turkey": "tr", |
| "Bulgaria": "bg", |
| "Croatia": "hr", |
| "Slovakia": "sk", |
| "Slovenia": "si", |
| "Serbia": "rs", |
|
|
| |
| "Estonia": "ee", |
| "Latvia": "lv", |
| "Lithuania": "lt", |
| } |
|
|
| with col_geo: |
| selected_country = st.selectbox( |
| "2. Geography", |
| options=list(iso_countries.keys()), |
| index=0 |
| ) |
| geo_code = iso_countries[selected_country] |
|
|
| |
| with col_time: |
| days_back = st.slider( |
| "3. Time Frame (Days Back)", |
| min_value=1, |
| max_value=30, |
| value=7, |
| help="How far back should we search for news?" |
| ) |
|
|
| |
| with col_limit: |
| max_news = st.number_input( |
| "4. Max Articles per Topic", |
| min_value=10, |
| max_value=MAX_NEWS_PER_TOPIC, |
| value=min(50, MAX_NEWS_PER_TOPIC), |
| step=10, |
| help=f"Control costs by limiting articles. Max allowed: {MAX_NEWS_PER_TOPIC}" |
| ) |
|
|
| |
| if st.button("π Find News & Extract Companies", type="primary"): |
| if not topics_input: |
| st.error("β οΈ Please enter at least one topic.") |
| else: |
| OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") |
| SERPER_API_KEY = os.environ.get('SERPER_API_KEY') |
|
|
| topic_list = [t.strip() for t in topics_input.split(",") if t.strip()] |
|
|
| |
| if len(topic_list) > MAX_TOPICS: |
| st.warning( |
| f"β οΈ Limit Reached: You entered {len(topic_list)} topics. Processing only the first {MAX_TOPICS}.") |
| topic_list = topic_list[:MAX_TOPICS] |
|
|
| with st.status("π€ Agent is working...", expanded=True) as status: |
| st.write(f"π Searching {len(topic_list)} topics in {selected_country} (Max {max_news} articles each)...") |
|
|
| |
| articles = search_news(topic_list, geo_code, days_back, max_news, SERPER_API_KEY, selected_country) |
|
|
| if not articles: |
| status.update(label="β No news found!", state="error") |
| st.stop() |
|
|
| st.write(f"β
Found {len(articles)} unique articles. π οΈ Extracting companies with LLM...") |
|
|
| |
| urls_to_process = [a['link'] for a in articles] |
| articles_with_companies_from_llm = get_companies_and_articles(urls_to_process, OPENAI_API_KEY) |
|
|
| st.write(f"β
Generating results...") |
|
|
| |
| matched_results = match_companies_to_articles(articles, articles_with_companies_from_llm) |
| structured_results = fill_missing_urls(matched_results, COMPANY_CACHE_SHEET_NAME, SERPER_API_KEY) |
|
|
| status.update(label="β
Search Complete!", state="complete", expanded=False) |
|
|
| |
| if structured_results: |
| st.session_state.results_data = pd.DataFrame(structured_results) |
| else: |
| st.warning("No companies found in the extracted text.") |
|
|
| |
| if st.session_state.results_data is not None: |
| st.divider() |
| st.subheader("π Extracted Data") |
|
|
| st.dataframe( |
| st.session_state.results_data, |
| column_config={ |
| "company_url": st.column_config.LinkColumn( |
| "Website" |
| ), |
| "article_url": st.column_config.LinkColumn( |
| "Source Article" |
| ), |
| }, |
| use_container_width=True |
| ) |
|
|
| csv = st.session_state.results_data.to_csv(index=False).encode('utf-8') |
| st.download_button( |
| label="π₯ Download Results as CSV", |
| data=csv, |
| file_name=f"news_extraction_{datetime.now().strftime('%Y%m%d_%H%M')}.csv", |
| mime="text/csv", |
| type="primary" |
| ) |