File size: 5,956 Bytes
0259c99 8b425b2 0259c99 8b425b2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 | import streamlit as st
import pandas as pd
from datetime import datetime
from search import search_news, fill_missing_urls
from fetch_and_extract import get_companies_and_articles
from helpers import match_companies_to_articles
from config import MAX_NEWS_PER_TOPIC, MAX_TOPICS, COMPANY_CACHE_SHEET_NAME
import os
# --- PAGE CONFIGURATION ---
st.set_page_config(page_title="News Finder Agent", page_icon="π΅οΈ", layout="wide")
# --- SESSION STATE INITIALIZATION ---
if 'results_data' not in st.session_state:
st.session_state.results_data = None
# --- MAIN INTERFACE ---
st.title("π΅οΈ News Finder AI Agent")
st.markdown("Enter your topics below to generate a report of companies mentioned in the news.")
# 1. TOPIC INPUT
topics_input = st.text_area(
f"1. Topics (Comma separated), maximum {MAX_TOPICS} topics",
placeholder="e.g. Artificial Intelligence, Nvidia, Supply Chain Logistics, Green Energy...",
help="Paste your long list of topics here. The agent will dedup and search for all of them."
)
# CHANGED: Created 3 columns to fit the new field neatly
col_geo, col_time, col_limit = st.columns(3)
# 2. GEOGRAPHY INPUT
iso_countries = {
# --- GLOBAL & NORTH AMERICA ---
"Global": "any",
"United States": "us",
"Canada": "ca",
# --- ASIA PACIFIC ---
"Australia": "au",
"China": "cn",
"India": "in",
"Japan": "jp",
"Malaysia": "my",
"South Korea": "kr",
"Singapore": "sg",
"Taiwan": "tw",
"Hong Kong": "hk",
# --- EUROPE (WESTERN) ---
"United Kingdom": "gb",
"Germany": "de",
"France": "fr",
"Italy": "it",
"Spain": "es",
"Netherlands": "nl",
"Belgium": "be",
"Switzerland": "ch",
"Austria": "at",
"Ireland": "ie",
"Luxembourg": "lu",
"Portugal": "pt",
# --- EUROPE (NORDIC) ---
"Sweden": "se",
"Norway": "no",
"Denmark": "dk",
"Finland": "fi",
"Iceland": "is",
# --- EUROPE (CENTRAL & EASTERN) ---
"Poland": "pl",
"Czech Republic": "cz",
"Hungary": "hu",
"Romania": "ro",
"Ukraine": "ua",
"Greece": "gr",
"Turkey": "tr",
"Bulgaria": "bg",
"Croatia": "hr",
"Slovakia": "sk",
"Slovenia": "si",
"Serbia": "rs",
# --- EUROPE (BALTIC) ---
"Estonia": "ee",
"Latvia": "lv",
"Lithuania": "lt",
}
with col_geo:
selected_country = st.selectbox(
"2. Geography",
options=list(iso_countries.keys()),
index=0
)
geo_code = iso_countries[selected_country]
# 3. TIME FRAME INPUT
with col_time:
days_back = st.slider(
"3. Time Frame (Days Back)",
min_value=1,
max_value=30,
value=7,
help="How far back should we search for news?"
)
# 4. MAX ARTICLES INPUT
with col_limit:
max_news = st.number_input(
"4. Max Articles per Topic",
min_value=10,
max_value=MAX_NEWS_PER_TOPIC, # Restricted by config
value=min(50, MAX_NEWS_PER_TOPIC),
step=10,
help=f"Control costs by limiting articles. Max allowed: {MAX_NEWS_PER_TOPIC}"
)
# --- ACTION BUTTON ---
if st.button("π Find News & Extract Companies", type="primary"):
if not topics_input:
st.error("β οΈ Please enter at least one topic.")
else:
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
SERPER_API_KEY = os.environ.get('SERPER_API_KEY')
topic_list = [t.strip() for t in topics_input.split(",") if t.strip()]
# ENFORCE LIMIT ON TOPICS
if len(topic_list) > MAX_TOPICS:
st.warning(
f"β οΈ Limit Reached: You entered {len(topic_list)} topics. Processing only the first {MAX_TOPICS}.")
topic_list = topic_list[:MAX_TOPICS]
with st.status("π€ Agent is working...", expanded=True) as status:
st.write(f"π Searching {len(topic_list)} topics in {selected_country} (Max {max_news} articles each)...")
# 1. Search News
articles = search_news(topic_list, geo_code, days_back, max_news, SERPER_API_KEY, selected_country)
if not articles:
status.update(label="β No news found!", state="error")
st.stop()
st.write(f"β
Found {len(articles)} unique articles. π οΈ Extracting companies with LLM...")
# 2. Extract Companies (LLM)
urls_to_process = [a['link'] for a in articles]
articles_with_companies_from_llm = get_companies_and_articles(urls_to_process, OPENAI_API_KEY)
st.write(f"β
Generating results...")
# 3. Combine & Fill URLs
matched_results = match_companies_to_articles(articles, articles_with_companies_from_llm)
structured_results = fill_missing_urls(matched_results, COMPANY_CACHE_SHEET_NAME, SERPER_API_KEY)
status.update(label="β
Search Complete!", state="complete", expanded=False)
# SAVE RESULTS
if structured_results:
st.session_state.results_data = pd.DataFrame(structured_results)
else:
st.warning("No companies found in the extracted text.")
# --- RESULTS & DOWNLOAD ---
if st.session_state.results_data is not None:
st.divider()
st.subheader("π Extracted Data")
st.dataframe(
st.session_state.results_data,
column_config={
"company_url": st.column_config.LinkColumn(
"Website" # Full URL shown, clickable
),
"article_url": st.column_config.LinkColumn(
"Source Article" # Full URL shown, clickable
),
},
use_container_width=True
)
csv = st.session_state.results_data.to_csv(index=False).encode('utf-8')
st.download_button(
label="π₯ Download Results as CSV",
data=csv,
file_name=f"news_extraction_{datetime.now().strftime('%Y%m%d_%H%M')}.csv",
mime="text/csv",
type="primary"
) |