from __future__ import annotations import json from pathlib import Path import sys APP_ROOT = Path(__file__).resolve().parent sys.path.insert(0, str(APP_ROOT / "src")) import streamlit as st from h2epr_explorer.constants import ( CATALOG_COLUMNS, GOLD_COMPANION_REPO, PUBLIC_DATASET_REPO, RELEASE_BOUNDARY_NOTICE, ) from h2epr_explorer.data_loader import load_catalog, load_event_graph, load_finalcascade_summary, load_stages from h2epr_explorer.filters import event_description, event_display_label, event_name, filter_catalog from h2epr_explorer.navigation import ( build_event_links, filter_summary_text, query_param_event_id, resolve_selected_event_index, ) from h2epr_explorer.render_gantt import build_timeline_figure def _as_records(frame): return frame.to_dict(orient="records") def _select_columns(frame, columns): present = [column for column in columns if column in frame.columns] return frame[present] if present else frame def _sort_stage_frame(frame): sort_columns = [column for column in ("stage_index", "stage_order", "stage_id") if column in frame.columns] return frame.sort_values(sort_columns) if sort_columns else frame def _safe_int(value, default=0): try: return int(value) except (TypeError, ValueError): return default st.set_page_config(page_title="H2EPR-Bench Explorer", layout="wide") st.markdown( """ """, unsafe_allow_html=True, ) st.markdown('
H²EPR-Bench · public release explorer
', unsafe_allow_html=True) st.markdown('
Event-process graph browser
', unsafe_allow_html=True) st.markdown( '
Browse public event metadata, stage rows, FinalCascade summaries, and Gantt-style timelines for the H²EPR-Bench release.
', unsafe_allow_html=True, ) st.info(RELEASE_BOUNDARY_NOTICE) catalog = load_catalog() stages = load_stages() summary = load_finalcascade_summary() catalog_rows = _as_records(catalog) with st.sidebar: st.header("Filter events") query = st.text_input("Search", placeholder="event name, ID, category, keyword") domains = st.multiselect("Domain", sorted(catalog["domain"].dropna().unique().tolist())) categories = st.multiselect("Category", sorted(catalog["event_category"].dropna().unique().tolist())) min_source_count = st.slider("Minimum sources", 0, int(catalog["source_count"].max()), 0) min_stage_count = st.slider("Minimum stages", 0, int(catalog["stage_count"].max()), 0) st.divider() st.link_button("Dataset repository", f"https://huggingface.co/datasets/{PUBLIC_DATASET_REPO}", use_container_width=True) st.link_button("Request Gold access", f"https://huggingface.co/datasets/{GOLD_COMPANION_REPO}", use_container_width=True) filtered_rows = filter_catalog( catalog_rows, query=query, domains=domains, categories=categories, min_source_count=min_source_count, min_stage_count=min_stage_count, ) if not filtered_rows: st.warning("No event matches the current filters.") st.stop() event_labels = {row["event_id"]: event_display_label(row) for row in catalog_rows} requested_event_id = query_param_event_id(st.query_params) selected_index = resolve_selected_event_index(filtered_rows, requested_event_id) selected_event = st.selectbox( "Selected event", [row["event_id"] for row in filtered_rows], index=selected_index, format_func=lambda event_id: event_labels.get(event_id, event_id), ) try: st.query_params["event_id"] = selected_event except Exception: pass event_row = catalog[catalog["event_id"] == selected_event].iloc[0] event_record = event_row.to_dict() event_stages = _sort_stage_frame(stages[stages["event_id"] == selected_event]) summary_row = summary[summary["event_id"] == selected_event] event_links = build_event_links(selected_event, str(event_record.get("gantt_html_path") or "")) st.caption(filter_summary_text(len(filtered_rows), len(catalog_rows))) tabs = st.tabs(["Catalog", "Event detail", "Timeline", "Stages", "FinalCascade JSON", "Access and boundary"]) with tabs[0]: st.subheader("Event catalog") st.dataframe(_select_columns(catalog[catalog["event_id"].isin([row["event_id"] for row in filtered_rows])], CATALOG_COLUMNS), use_container_width=True, height=520) with tabs[1]: st.subheader(event_name(event_record)) st.write(event_description(event_record)) c1, c2, c3, c4, c5 = st.columns(5) c1.metric("Sources", _safe_int(event_row.get("source_count", 0))) c2.metric("Stages", _safe_int(event_row.get("stage_count", 0))) c3.metric("Episodes", _safe_int(event_row.get("episode_count", 0))) c4.metric("Participants", _safe_int(event_row.get("participant_count", 0))) c5.metric("Relations", _safe_int(event_row.get("relation_count", 0))) st.markdown("#### Event profile") profile_columns = [ "event_id", "display_name", "domain", "event_category", "event_scope_label", "keywords", "event_boundary_time_status", "temporal_anchor_summary", "gold_reference_access_level", "finalcascade_access_level", ] st.dataframe(_select_columns(catalog[catalog["event_id"] == selected_event], profile_columns), use_container_width=True) link_cols = st.columns(4) link_cols[0].link_button("Open dataset", event_links["public_dataset"], use_container_width=True) link_cols[1].link_button("Gold access", event_links["gold_request"], use_container_width=True) link_cols[2].link_button("FinalCascade file", event_links["finalcascade_jsonl"], use_container_width=True) if "gantt_html" in event_links: link_cols[3].link_button("Gantt artifact", event_links["gantt_html"], use_container_width=True) if not summary_row.empty: st.markdown("#### Public FinalCascade summary") summary_columns = [ "event_id", "stage_count", "episode_count", "participant_count", "transaction_count", "relation_count", "event_boundary_time_status", "known_action_time_anchor_count", "not_gold_warning", ] st.dataframe(_select_columns(summary_row, summary_columns), use_container_width=True) with tabs[2]: figure = build_timeline_figure(_as_records(event_stages), selected_event) if figure is None: st.warning("No public stage rows are available for this event.") else: st.plotly_chart(figure, use_container_width=True) if "gantt_html_path" in event_row and event_row.get("gantt_html_path"): st.markdown(f"Gantt HTML artifact path: `{event_row.get('gantt_html_path')}`") with tabs[3]: st.dataframe(event_stages, use_container_width=True, height=520) with tabs[4]: graph = load_event_graph(selected_event) st.download_button( "Download selected public FinalCascade JSON", data=json.dumps(graph, ensure_ascii=False, indent=2), file_name=f"{selected_event}_finalcascade_public.json", mime="application/json", ) st.json(graph, expanded=False) with tabs[5]: st.markdown( f""" ### Release boundary - Public dataset repo: [`{PUBLIC_DATASET_REPO}`](https://huggingface.co/datasets/{PUBLIC_DATASET_REPO}) - Manual-gated Gold companion: [`{GOLD_COMPANION_REPO}`](https://huggingface.co/datasets/{GOLD_COMPANION_REPO}) - This Explorer loads public event metadata, public stages, public sanitized FinalCascade records, and public visualization paths. - It does not load gated Gold references. - Public FinalCascade and Gantt views are supplementary inspection assets, not official scoring references. """ )