Frontend_test / streamlit_app.py
danielhjerresen's picture
Update streamlit_app.py
a253752 verified
Raw
History Blame Contribute Delete
13.9 kB
# frontend/streamlit_app.py
import json
import os
import pandas as pd
import requests
import streamlit as st
st.set_page_config(
page_title="Green Energy News Event Dashboard",
page_icon="📰",
layout="wide",
)
API_BASE_URL = os.getenv(
"API_BASE_URL",
"https://danielhjerresen-API-test.hf.space",
)
def ensure_columns(df: pd.DataFrame, columns: list[str]) -> pd.DataFrame:
df = df.copy()
for column in columns:
if column not in df.columns:
df[column] = None
return df
@st.cache_data(ttl=300)
def load_classified_articles() -> pd.DataFrame:
try:
response = requests.get(
f"{API_BASE_URL}/articles",
params={"limit": 500},
timeout=30,
)
response.raise_for_status()
data = response.json()
df = pd.DataFrame(data)
if df.empty:
return df
df = ensure_columns(
df,
[
"article_id",
"title",
"description",
"source",
"label",
"raw_label",
"url",
"published_at",
"classified_at",
],
)
df["published_at"] = pd.to_datetime(
df["published_at"],
errors="coerce",
utc=True,
)
df["classified_at"] = pd.to_datetime(
df["classified_at"],
errors="coerce",
utc=True,
)
df["published_date"] = df["published_at"].dt.date
df["published_day"] = df["published_at"].dt.strftime("%Y-%m-%d")
return df
except Exception as error:
st.error(f"Failed to load articles from API: {error}")
return pd.DataFrame()
@st.cache_data(ttl=300)
def load_daily_summary() -> dict:
try:
response = requests.get(f"{API_BASE_URL}/summary/daily", timeout=30)
response.raise_for_status()
summary = response.json()
if not isinstance(summary, dict):
return {}
return normalize_summary_payload(summary)
except Exception as error:
st.error(f"Failed to load daily summary: {error}")
return {}
def normalize_summary_payload(summary: dict) -> dict:
normalized = dict(summary)
nested_summary = summary.get("summary_json")
if isinstance(nested_summary, str):
try:
parsed = json.loads(nested_summary)
if isinstance(parsed, dict):
normalized.update(parsed)
except Exception:
pass
elif isinstance(nested_summary, dict):
normalized.update(nested_summary)
normalized["executive_summary"] = (
normalized.get("executive_summary")
or normalized.get("short_summary")
or ""
)
normalized["recommended_focus"] = (
normalized.get("recommended_focus")
or normalized.get("key_focus")
or ""
)
if not isinstance(normalized.get("decision_implications"), list):
normalized["decision_implications"] = []
if not isinstance(normalized.get("watchlist"), list):
normalized["watchlist"] = []
if not isinstance(normalized.get("top_stories"), list):
normalized["top_stories"] = []
return normalized
def is_valid_url(value: object) -> bool:
if not isinstance(value, str):
return False
return value.startswith(("http://", "https://"))
def apply_filters(df: pd.DataFrame) -> pd.DataFrame:
st.sidebar.header("Filters")
label_options = sorted(df["label"].dropna().unique().tolist())
source_options = sorted(df["source"].dropna().unique().tolist())
default_labels = [
label
for label in label_options
if label != "not relevant to field"
]
selected_labels = st.sidebar.multiselect(
"Action categories",
options=label_options,
default=default_labels,
)
selected_sources = st.sidebar.multiselect(
"Sources",
options=source_options,
default=[],
)
min_date = df["published_date"].min() if not df.empty else None
max_date = df["published_date"].max() if not df.empty else None
date_range = None
if min_date and max_date:
date_range = st.sidebar.date_input(
"Date range",
value=(min_date, max_date),
min_value=min_date,
max_value=max_date,
)
search_term = st.sidebar.text_input("Search title or description")
filtered = df.copy()
if selected_labels:
filtered = filtered[filtered["label"].isin(selected_labels)]
if selected_sources:
filtered = filtered[filtered["source"].isin(selected_sources)]
if date_range and len(date_range) == 2:
start_date, end_date = date_range
filtered = filtered[
(filtered["published_date"] >= start_date)
& (filtered["published_date"] <= end_date)
]
if search_term:
search_term = search_term.strip()
title_matches = filtered["title"].fillna("").str.contains(
search_term,
case=False,
na=False,
regex=False,
)
description_matches = filtered["description"].fillna("").str.contains(
search_term,
case=False,
na=False,
regex=False,
)
filtered = filtered[title_matches | description_matches]
return filtered
def render_metrics(df: pd.DataFrame, filtered_df: pd.DataFrame) -> None:
c1, c2, c3, c4 = st.columns(4)
c1.metric("Articles", len(df))
c2.metric("Shown", len(filtered_df))
c3.metric("Sources", df["source"].nunique())
c4.metric("Categories", df["label"].nunique())
def render_bullet_list(items: list[str], empty_message: str) -> None:
if not items:
st.info(empty_message)
return
for item in items:
st.markdown(f"- {item}")
def get_summary_source_articles(
df: pd.DataFrame,
summary: dict,
fallback_limit: int = 15,
) -> pd.DataFrame:
stories = summary.get("top_stories", [])
story_ids = {
str(story.get("article_id"))
for story in stories
if isinstance(story, dict) and story.get("article_id")
}
if story_ids:
matched_df = df[df["article_id"].astype(str).isin(story_ids)]
if not matched_df.empty:
return matched_df
relevant_df = df[df["label"] != "not relevant to field"].copy()
if "published_at" in relevant_df:
relevant_df = relevant_df.sort_values("published_at", ascending=False)
return relevant_df.head(fallback_limit)
def render_daily_summary_source_basis(
df: pd.DataFrame,
summary: dict,
) -> pd.DataFrame:
summary_df = get_summary_source_articles(df, summary)
generated_at = summary.get("generated_at")
top_story_count = len(summary.get("top_stories", []))
formatted_generated_at = None
if generated_at:
parsed = pd.to_datetime(generated_at, errors="coerce", utc=True)
if pd.notnull(parsed):
formatted_generated_at = parsed.strftime("%Y-%m-%d %H:%M UTC")
if formatted_generated_at:
st.caption(
f"Summary generated {formatted_generated_at} · "
f"{top_story_count} top stories included"
)
else:
st.caption(
f"{top_story_count} top stories included in this summary"
)
return summary_df
def render_daily_summary(summary: dict) -> None:
st.subheader("Daily AI Summary")
if not summary:
st.info("No daily summary available yet.")
return
summary_date = summary.get("summary_date", "Unknown")
generated_at = summary.get("generated_at")
if generated_at:
st.caption(f"Summary date: {summary_date} · Generated at: {generated_at}")
else:
st.caption(f"Summary date: {summary_date}")
st.markdown("### Executive Summary")
st.write(summary.get("executive_summary") or "No summary available.")
st.markdown("### Key Signal")
st.write(summary.get("key_signal") or "No key signal available.")
st.markdown("### Recommended Focus")
st.write(summary.get("recommended_focus") or "No focus available.")
st.markdown("### Decision Implications")
render_bullet_list(
summary.get("decision_implications", []),
"No decision implications available.",
)
st.markdown("### Watchlist")
render_bullet_list(
summary.get("watchlist", []),
"No watchlist available.",
)
stories = summary.get("top_stories", [])
if not stories:
st.info("No top stories available.")
return
st.markdown("### Top Stories")
for story in stories:
if not isinstance(story, dict):
continue
title = story.get("title", "Untitled story")
label = story.get("label", "Unknown")
source = story.get("source", "Unknown source")
published_at = story.get("published_at")
description = story.get("description", "")
why_it_matters = story.get("why_it_matters", "")
decision_relevance = story.get("decision_relevance", "")
url = story.get("url")
article_id = story.get("article_id")
if pd.notnull(published_at):
published_at = pd.to_datetime(
published_at,
errors="coerce",
utc=True,
)
if pd.notnull(published_at):
published_at = published_at.strftime("%Y-%m-%d %H:%M UTC")
else:
published_at = "Unknown date"
else:
published_at = "Unknown date"
with st.expander(title):
c1, c2, c3 = st.columns(3)
c1.markdown(f"**Category:** {label}")
c2.markdown(f"**Source:** {source}")
c3.markdown(f"**Published:** {published_at}")
if description:
st.markdown("**Description**")
st.write(description)
if why_it_matters:
st.markdown("**Why this matters**")
st.write(why_it_matters)
if decision_relevance:
st.markdown("**Decision relevance**")
st.write(decision_relevance)
if is_valid_url(url):
st.link_button("Open article", url)
if article_id:
st.caption(f"Article ID: {article_id}")
def render_article_browser(df: pd.DataFrame) -> None:
st.subheader("Article Browser")
if df.empty:
st.info("No articles available for browsing.")
return
sort_option = st.selectbox(
"Sort articles by",
options=[
"Newest first",
"Oldest first",
"Action category",
"Source",
],
index=0,
)
display_df = df.copy()
if sort_option == "Newest first":
display_df = display_df.sort_values("published_at", ascending=False)
elif sort_option == "Oldest first":
display_df = display_df.sort_values("published_at", ascending=True)
elif sort_option == "Action category":
display_df = display_df.sort_values(
["label", "published_at"],
ascending=[True, False],
)
elif sort_option == "Source":
display_df = display_df.sort_values(
["source", "published_at"],
ascending=[True, False],
)
max_rows = st.slider("Number of articles to display", 5, 100, 20)
display_df = display_df.head(max_rows)
for _, row in display_df.iterrows():
title = row.get("title", "Untitled article")
published_str = (
row["published_at"].strftime("%Y-%m-%d %H:%M UTC")
if pd.notnull(row.get("published_at"))
else "Unknown"
)
with st.expander(title):
meta1, meta2, meta3 = st.columns(3)
meta1.markdown(f"**Action:** {row.get('label', 'Unknown')}")
meta2.markdown(f"**Source:** {row.get('source', 'Unknown source')}")
meta3.markdown(f"**Published:** {published_str}")
description = row.get("description")
if pd.notnull(description) and str(description).strip():
st.markdown("**Description**")
st.write(description)
url = row.get("url")
if is_valid_url(url):
st.link_button("Open article", url)
st.markdown("**More details**")
article_id = row.get("article_id")
if pd.notnull(article_id):
st.caption(f"Article ID: {article_id}")
raw_label = row.get("raw_label")
if pd.notnull(raw_label) and str(raw_label).strip():
st.caption(f"Model output: {raw_label}")
def main() -> None:
st.title("📰 Green Energy News Event Dashboard")
st.write(
"This dashboard gives an overview of classified green energy and climate-tech news, "
"with filters for action categories, dates, sources, and search terms."
)
df = load_classified_articles()
summary = load_daily_summary()
if df.empty:
st.warning(
"No classified articles found yet. "
"Check whether the API is live and returning data."
)
return
section = st.segmented_control(
"View",
options=["Daily Summary", "Articles"],
default="Daily Summary",
)
if section == "Daily Summary":
summary_df = render_daily_summary_source_basis(df, summary)
render_metrics(df, summary_df)
render_daily_summary(summary)
elif section == "Articles":
filtered_df = apply_filters(df)
render_metrics(df, filtered_df)
render_article_browser(filtered_df)
if __name__ == "__main__":
main()