Spaces:
Sleeping
Sleeping
| # frontend/streamlit_app.py | |
| import json | |
| import os | |
| import pandas as pd | |
| import requests | |
| import streamlit as st | |
| st.set_page_config( | |
| page_title="Green Energy News Event Dashboard", | |
| page_icon="📰", | |
| layout="wide", | |
| ) | |
| API_BASE_URL = os.getenv( | |
| "API_BASE_URL", | |
| "https://danielhjerresen-API-test.hf.space", | |
| ) | |
| def ensure_columns(df: pd.DataFrame, columns: list[str]) -> pd.DataFrame: | |
| df = df.copy() | |
| for column in columns: | |
| if column not in df.columns: | |
| df[column] = None | |
| return df | |
| def load_classified_articles() -> pd.DataFrame: | |
| try: | |
| response = requests.get( | |
| f"{API_BASE_URL}/articles", | |
| params={"limit": 500}, | |
| timeout=30, | |
| ) | |
| response.raise_for_status() | |
| data = response.json() | |
| df = pd.DataFrame(data) | |
| if df.empty: | |
| return df | |
| df = ensure_columns( | |
| df, | |
| [ | |
| "article_id", | |
| "title", | |
| "description", | |
| "source", | |
| "label", | |
| "raw_label", | |
| "url", | |
| "published_at", | |
| "classified_at", | |
| ], | |
| ) | |
| df["published_at"] = pd.to_datetime( | |
| df["published_at"], | |
| errors="coerce", | |
| utc=True, | |
| ) | |
| df["classified_at"] = pd.to_datetime( | |
| df["classified_at"], | |
| errors="coerce", | |
| utc=True, | |
| ) | |
| df["published_date"] = df["published_at"].dt.date | |
| df["published_day"] = df["published_at"].dt.strftime("%Y-%m-%d") | |
| return df | |
| except Exception as error: | |
| st.error(f"Failed to load articles from API: {error}") | |
| return pd.DataFrame() | |
| def load_daily_summary() -> dict: | |
| try: | |
| response = requests.get(f"{API_BASE_URL}/summary/daily", timeout=30) | |
| response.raise_for_status() | |
| summary = response.json() | |
| if not isinstance(summary, dict): | |
| return {} | |
| return normalize_summary_payload(summary) | |
| except Exception as error: | |
| st.error(f"Failed to load daily summary: {error}") | |
| return {} | |
| def normalize_summary_payload(summary: dict) -> dict: | |
| normalized = dict(summary) | |
| nested_summary = summary.get("summary_json") | |
| if isinstance(nested_summary, str): | |
| try: | |
| parsed = json.loads(nested_summary) | |
| if isinstance(parsed, dict): | |
| normalized.update(parsed) | |
| except Exception: | |
| pass | |
| elif isinstance(nested_summary, dict): | |
| normalized.update(nested_summary) | |
| normalized["executive_summary"] = ( | |
| normalized.get("executive_summary") | |
| or normalized.get("short_summary") | |
| or "" | |
| ) | |
| normalized["recommended_focus"] = ( | |
| normalized.get("recommended_focus") | |
| or normalized.get("key_focus") | |
| or "" | |
| ) | |
| if not isinstance(normalized.get("decision_implications"), list): | |
| normalized["decision_implications"] = [] | |
| if not isinstance(normalized.get("watchlist"), list): | |
| normalized["watchlist"] = [] | |
| if not isinstance(normalized.get("top_stories"), list): | |
| normalized["top_stories"] = [] | |
| return normalized | |
| def is_valid_url(value: object) -> bool: | |
| if not isinstance(value, str): | |
| return False | |
| return value.startswith(("http://", "https://")) | |
| def apply_filters(df: pd.DataFrame) -> pd.DataFrame: | |
| st.sidebar.header("Filters") | |
| label_options = sorted(df["label"].dropna().unique().tolist()) | |
| source_options = sorted(df["source"].dropna().unique().tolist()) | |
| default_labels = [ | |
| label | |
| for label in label_options | |
| if label != "not relevant to field" | |
| ] | |
| selected_labels = st.sidebar.multiselect( | |
| "Action categories", | |
| options=label_options, | |
| default=default_labels, | |
| ) | |
| selected_sources = st.sidebar.multiselect( | |
| "Sources", | |
| options=source_options, | |
| default=[], | |
| ) | |
| min_date = df["published_date"].min() if not df.empty else None | |
| max_date = df["published_date"].max() if not df.empty else None | |
| date_range = None | |
| if min_date and max_date: | |
| date_range = st.sidebar.date_input( | |
| "Date range", | |
| value=(min_date, max_date), | |
| min_value=min_date, | |
| max_value=max_date, | |
| ) | |
| search_term = st.sidebar.text_input("Search title or description") | |
| filtered = df.copy() | |
| if selected_labels: | |
| filtered = filtered[filtered["label"].isin(selected_labels)] | |
| if selected_sources: | |
| filtered = filtered[filtered["source"].isin(selected_sources)] | |
| if date_range and len(date_range) == 2: | |
| start_date, end_date = date_range | |
| filtered = filtered[ | |
| (filtered["published_date"] >= start_date) | |
| & (filtered["published_date"] <= end_date) | |
| ] | |
| if search_term: | |
| search_term = search_term.strip() | |
| title_matches = filtered["title"].fillna("").str.contains( | |
| search_term, | |
| case=False, | |
| na=False, | |
| regex=False, | |
| ) | |
| description_matches = filtered["description"].fillna("").str.contains( | |
| search_term, | |
| case=False, | |
| na=False, | |
| regex=False, | |
| ) | |
| filtered = filtered[title_matches | description_matches] | |
| return filtered | |
| def render_metrics(df: pd.DataFrame, filtered_df: pd.DataFrame) -> None: | |
| c1, c2, c3, c4 = st.columns(4) | |
| c1.metric("Articles", len(df)) | |
| c2.metric("Shown", len(filtered_df)) | |
| c3.metric("Sources", df["source"].nunique()) | |
| c4.metric("Categories", df["label"].nunique()) | |
| def render_bullet_list(items: list[str], empty_message: str) -> None: | |
| if not items: | |
| st.info(empty_message) | |
| return | |
| for item in items: | |
| st.markdown(f"- {item}") | |
| def get_summary_source_articles( | |
| df: pd.DataFrame, | |
| summary: dict, | |
| fallback_limit: int = 15, | |
| ) -> pd.DataFrame: | |
| stories = summary.get("top_stories", []) | |
| story_ids = { | |
| str(story.get("article_id")) | |
| for story in stories | |
| if isinstance(story, dict) and story.get("article_id") | |
| } | |
| if story_ids: | |
| matched_df = df[df["article_id"].astype(str).isin(story_ids)] | |
| if not matched_df.empty: | |
| return matched_df | |
| relevant_df = df[df["label"] != "not relevant to field"].copy() | |
| if "published_at" in relevant_df: | |
| relevant_df = relevant_df.sort_values("published_at", ascending=False) | |
| return relevant_df.head(fallback_limit) | |
| def render_daily_summary_source_basis( | |
| df: pd.DataFrame, | |
| summary: dict, | |
| ) -> pd.DataFrame: | |
| summary_df = get_summary_source_articles(df, summary) | |
| generated_at = summary.get("generated_at") | |
| top_story_count = len(summary.get("top_stories", [])) | |
| formatted_generated_at = None | |
| if generated_at: | |
| parsed = pd.to_datetime(generated_at, errors="coerce", utc=True) | |
| if pd.notnull(parsed): | |
| formatted_generated_at = parsed.strftime("%Y-%m-%d %H:%M UTC") | |
| if formatted_generated_at: | |
| st.caption( | |
| f"Summary generated {formatted_generated_at} · " | |
| f"{top_story_count} top stories included" | |
| ) | |
| else: | |
| st.caption( | |
| f"{top_story_count} top stories included in this summary" | |
| ) | |
| return summary_df | |
| def render_daily_summary(summary: dict) -> None: | |
| st.subheader("Daily AI Summary") | |
| if not summary: | |
| st.info("No daily summary available yet.") | |
| return | |
| summary_date = summary.get("summary_date", "Unknown") | |
| generated_at = summary.get("generated_at") | |
| if generated_at: | |
| st.caption(f"Summary date: {summary_date} · Generated at: {generated_at}") | |
| else: | |
| st.caption(f"Summary date: {summary_date}") | |
| st.markdown("### Executive Summary") | |
| st.write(summary.get("executive_summary") or "No summary available.") | |
| st.markdown("### Key Signal") | |
| st.write(summary.get("key_signal") or "No key signal available.") | |
| st.markdown("### Recommended Focus") | |
| st.write(summary.get("recommended_focus") or "No focus available.") | |
| st.markdown("### Decision Implications") | |
| render_bullet_list( | |
| summary.get("decision_implications", []), | |
| "No decision implications available.", | |
| ) | |
| st.markdown("### Watchlist") | |
| render_bullet_list( | |
| summary.get("watchlist", []), | |
| "No watchlist available.", | |
| ) | |
| stories = summary.get("top_stories", []) | |
| if not stories: | |
| st.info("No top stories available.") | |
| return | |
| st.markdown("### Top Stories") | |
| for story in stories: | |
| if not isinstance(story, dict): | |
| continue | |
| title = story.get("title", "Untitled story") | |
| label = story.get("label", "Unknown") | |
| source = story.get("source", "Unknown source") | |
| published_at = story.get("published_at") | |
| description = story.get("description", "") | |
| why_it_matters = story.get("why_it_matters", "") | |
| decision_relevance = story.get("decision_relevance", "") | |
| url = story.get("url") | |
| article_id = story.get("article_id") | |
| if pd.notnull(published_at): | |
| published_at = pd.to_datetime( | |
| published_at, | |
| errors="coerce", | |
| utc=True, | |
| ) | |
| if pd.notnull(published_at): | |
| published_at = published_at.strftime("%Y-%m-%d %H:%M UTC") | |
| else: | |
| published_at = "Unknown date" | |
| else: | |
| published_at = "Unknown date" | |
| with st.expander(title): | |
| c1, c2, c3 = st.columns(3) | |
| c1.markdown(f"**Category:** {label}") | |
| c2.markdown(f"**Source:** {source}") | |
| c3.markdown(f"**Published:** {published_at}") | |
| if description: | |
| st.markdown("**Description**") | |
| st.write(description) | |
| if why_it_matters: | |
| st.markdown("**Why this matters**") | |
| st.write(why_it_matters) | |
| if decision_relevance: | |
| st.markdown("**Decision relevance**") | |
| st.write(decision_relevance) | |
| if is_valid_url(url): | |
| st.link_button("Open article", url) | |
| if article_id: | |
| st.caption(f"Article ID: {article_id}") | |
| def render_article_browser(df: pd.DataFrame) -> None: | |
| st.subheader("Article Browser") | |
| if df.empty: | |
| st.info("No articles available for browsing.") | |
| return | |
| sort_option = st.selectbox( | |
| "Sort articles by", | |
| options=[ | |
| "Newest first", | |
| "Oldest first", | |
| "Action category", | |
| "Source", | |
| ], | |
| index=0, | |
| ) | |
| display_df = df.copy() | |
| if sort_option == "Newest first": | |
| display_df = display_df.sort_values("published_at", ascending=False) | |
| elif sort_option == "Oldest first": | |
| display_df = display_df.sort_values("published_at", ascending=True) | |
| elif sort_option == "Action category": | |
| display_df = display_df.sort_values( | |
| ["label", "published_at"], | |
| ascending=[True, False], | |
| ) | |
| elif sort_option == "Source": | |
| display_df = display_df.sort_values( | |
| ["source", "published_at"], | |
| ascending=[True, False], | |
| ) | |
| max_rows = st.slider("Number of articles to display", 5, 100, 20) | |
| display_df = display_df.head(max_rows) | |
| for _, row in display_df.iterrows(): | |
| title = row.get("title", "Untitled article") | |
| published_str = ( | |
| row["published_at"].strftime("%Y-%m-%d %H:%M UTC") | |
| if pd.notnull(row.get("published_at")) | |
| else "Unknown" | |
| ) | |
| with st.expander(title): | |
| meta1, meta2, meta3 = st.columns(3) | |
| meta1.markdown(f"**Action:** {row.get('label', 'Unknown')}") | |
| meta2.markdown(f"**Source:** {row.get('source', 'Unknown source')}") | |
| meta3.markdown(f"**Published:** {published_str}") | |
| description = row.get("description") | |
| if pd.notnull(description) and str(description).strip(): | |
| st.markdown("**Description**") | |
| st.write(description) | |
| url = row.get("url") | |
| if is_valid_url(url): | |
| st.link_button("Open article", url) | |
| st.markdown("**More details**") | |
| article_id = row.get("article_id") | |
| if pd.notnull(article_id): | |
| st.caption(f"Article ID: {article_id}") | |
| raw_label = row.get("raw_label") | |
| if pd.notnull(raw_label) and str(raw_label).strip(): | |
| st.caption(f"Model output: {raw_label}") | |
| def main() -> None: | |
| st.title("📰 Green Energy News Event Dashboard") | |
| st.write( | |
| "This dashboard gives an overview of classified green energy and climate-tech news, " | |
| "with filters for action categories, dates, sources, and search terms." | |
| ) | |
| df = load_classified_articles() | |
| summary = load_daily_summary() | |
| if df.empty: | |
| st.warning( | |
| "No classified articles found yet. " | |
| "Check whether the API is live and returning data." | |
| ) | |
| return | |
| section = st.segmented_control( | |
| "View", | |
| options=["Daily Summary", "Articles"], | |
| default="Daily Summary", | |
| ) | |
| if section == "Daily Summary": | |
| summary_df = render_daily_summary_source_basis(df, summary) | |
| render_metrics(df, summary_df) | |
| render_daily_summary(summary) | |
| elif section == "Articles": | |
| filtered_df = apply_filters(df) | |
| render_metrics(df, filtered_df) | |
| render_article_browser(filtered_df) | |
| if __name__ == "__main__": | |
| main() |