Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| from datetime import datetime | |
| from search import search_news, fill_missing_urls | |
| from fetch_and_extract import get_companies_and_articles | |
| from helpers import match_companies_to_articles | |
| from config import MAX_NEWS_PER_TOPIC, MAX_TOPICS, COMPANY_CACHE_SHEET_NAME | |
| import os | |
| # --- PAGE CONFIGURATION --- | |
| st.set_page_config(page_title="News Finder Agent", page_icon="π΅οΈ", layout="wide") | |
| # --- SESSION STATE INITIALIZATION --- | |
| if 'results_data' not in st.session_state: | |
| st.session_state.results_data = None | |
| # --- MAIN INTERFACE --- | |
| st.title("π΅οΈ News Finder AI Agent") | |
| st.markdown("Enter your topics below to generate a report of companies mentioned in the news.") | |
| # 1. TOPIC INPUT | |
| topics_input = st.text_area( | |
| f"1. Topics (Comma separated), maximum {MAX_TOPICS} topics", | |
| placeholder="e.g. Artificial Intelligence, Nvidia, Supply Chain Logistics, Green Energy...", | |
| help="Paste your long list of topics here. The agent will dedup and search for all of them." | |
| ) | |
| # CHANGED: Created 3 columns to fit the new field neatly | |
| col_geo, col_time, col_limit = st.columns(3) | |
| # 2. GEOGRAPHY INPUT | |
| iso_countries = { | |
| # --- GLOBAL & NORTH AMERICA --- | |
| "Global": "any", | |
| "United States": "us", | |
| "Canada": "ca", | |
| # --- ASIA PACIFIC --- | |
| "Australia": "au", | |
| "China": "cn", | |
| "India": "in", | |
| "Japan": "jp", | |
| "Malaysia": "my", | |
| "South Korea": "kr", | |
| "Singapore": "sg", | |
| "Taiwan": "tw", | |
| "Hong Kong": "hk", | |
| # --- EUROPE (WESTERN) --- | |
| "United Kingdom": "gb", | |
| "Germany": "de", | |
| "France": "fr", | |
| "Italy": "it", | |
| "Spain": "es", | |
| "Netherlands": "nl", | |
| "Belgium": "be", | |
| "Switzerland": "ch", | |
| "Austria": "at", | |
| "Ireland": "ie", | |
| "Luxembourg": "lu", | |
| "Portugal": "pt", | |
| # --- EUROPE (NORDIC) --- | |
| "Sweden": "se", | |
| "Norway": "no", | |
| "Denmark": "dk", | |
| "Finland": "fi", | |
| "Iceland": "is", | |
| # --- EUROPE (CENTRAL & EASTERN) --- | |
| "Poland": "pl", | |
| "Czech Republic": "cz", | |
| "Hungary": "hu", | |
| "Romania": "ro", | |
| "Ukraine": "ua", | |
| "Greece": "gr", | |
| "Turkey": "tr", | |
| "Bulgaria": "bg", | |
| "Croatia": "hr", | |
| "Slovakia": "sk", | |
| "Slovenia": "si", | |
| "Serbia": "rs", | |
| # --- EUROPE (BALTIC) --- | |
| "Estonia": "ee", | |
| "Latvia": "lv", | |
| "Lithuania": "lt", | |
| } | |
| with col_geo: | |
| selected_country = st.selectbox( | |
| "2. Geography", | |
| options=list(iso_countries.keys()), | |
| index=0 | |
| ) | |
| geo_code = iso_countries[selected_country] | |
| # 3. TIME FRAME INPUT | |
| with col_time: | |
| days_back = st.slider( | |
| "3. Time Frame (Days Back)", | |
| min_value=1, | |
| max_value=30, | |
| value=7, | |
| help="How far back should we search for news?" | |
| ) | |
| # 4. MAX ARTICLES INPUT | |
| with col_limit: | |
| max_news = st.number_input( | |
| "4. Max Articles per Topic", | |
| min_value=10, | |
| max_value=MAX_NEWS_PER_TOPIC, # Restricted by config | |
| value=min(50, MAX_NEWS_PER_TOPIC), | |
| step=10, | |
| help=f"Control costs by limiting articles. Max allowed: {MAX_NEWS_PER_TOPIC}" | |
| ) | |
| # --- ACTION BUTTON --- | |
| if st.button("π Find News & Extract Companies", type="primary"): | |
| if not topics_input: | |
| st.error("β οΈ Please enter at least one topic.") | |
| else: | |
| OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") | |
| SERPER_API_KEY = os.environ.get('SERPER_API_KEY') | |
| topic_list = [t.strip() for t in topics_input.split(",") if t.strip()] | |
| # ENFORCE LIMIT ON TOPICS | |
| if len(topic_list) > MAX_TOPICS: | |
| st.warning( | |
| f"β οΈ Limit Reached: You entered {len(topic_list)} topics. Processing only the first {MAX_TOPICS}.") | |
| topic_list = topic_list[:MAX_TOPICS] | |
| with st.status("π€ Agent is working...", expanded=True) as status: | |
| st.write(f"π Searching {len(topic_list)} topics in {selected_country} (Max {max_news} articles each)...") | |
| # 1. Search News | |
| articles = search_news(topic_list, geo_code, days_back, max_news, SERPER_API_KEY, selected_country) | |
| if not articles: | |
| status.update(label="β No news found!", state="error") | |
| st.stop() | |
| st.write(f"β Found {len(articles)} unique articles. π οΈ Extracting companies with LLM...") | |
| # 2. Extract Companies (LLM) | |
| urls_to_process = [a['link'] for a in articles] | |
| articles_with_companies_from_llm = get_companies_and_articles(urls_to_process, OPENAI_API_KEY) | |
| st.write(f"β Generating results...") | |
| # 3. Combine & Fill URLs | |
| matched_results = match_companies_to_articles(articles, articles_with_companies_from_llm) | |
| structured_results = fill_missing_urls(matched_results, COMPANY_CACHE_SHEET_NAME, SERPER_API_KEY) | |
| status.update(label="β Search Complete!", state="complete", expanded=False) | |
| # SAVE RESULTS | |
| if structured_results: | |
| st.session_state.results_data = pd.DataFrame(structured_results) | |
| else: | |
| st.warning("No companies found in the extracted text.") | |
| # --- RESULTS & DOWNLOAD --- | |
| if st.session_state.results_data is not None: | |
| st.divider() | |
| st.subheader("π Extracted Data") | |
| st.dataframe( | |
| st.session_state.results_data, | |
| column_config={ | |
| "company_url": st.column_config.LinkColumn( | |
| "Website" # Full URL shown, clickable | |
| ), | |
| "article_url": st.column_config.LinkColumn( | |
| "Source Article" # Full URL shown, clickable | |
| ), | |
| }, | |
| use_container_width=True | |
| ) | |
| csv = st.session_state.results_data.to_csv(index=False).encode('utf-8') | |
| st.download_button( | |
| label="π₯ Download Results as CSV", | |
| data=csv, | |
| file_name=f"news_extraction_{datetime.now().strftime('%Y%m%d_%H%M')}.csv", | |
| mime="text/csv", | |
| type="primary" | |
| ) |