Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import json | |
| from pathlib import Path | |
| import sys | |
| APP_ROOT = Path(__file__).resolve().parent | |
| sys.path.insert(0, str(APP_ROOT / "src")) | |
| import streamlit as st | |
| from h2epr_explorer.constants import ( | |
| CATALOG_COLUMNS, | |
| GOLD_COMPANION_REPO, | |
| PUBLIC_DATASET_REPO, | |
| RELEASE_BOUNDARY_NOTICE, | |
| ) | |
| from h2epr_explorer.data_loader import load_catalog, load_event_graph, load_finalcascade_summary, load_stages | |
| from h2epr_explorer.filters import event_description, event_display_label, event_name, filter_catalog | |
| from h2epr_explorer.navigation import ( | |
| build_event_links, | |
| filter_summary_text, | |
| query_param_event_id, | |
| resolve_selected_event_index, | |
| ) | |
| from h2epr_explorer.render_gantt import build_timeline_figure | |
| def _as_records(frame): | |
| return frame.to_dict(orient="records") | |
| def _select_columns(frame, columns): | |
| present = [column for column in columns if column in frame.columns] | |
| return frame[present] if present else frame | |
| def _sort_stage_frame(frame): | |
| sort_columns = [column for column in ("stage_index", "stage_order", "stage_id") if column in frame.columns] | |
| return frame.sort_values(sort_columns) if sort_columns else frame | |
| def _safe_int(value, default=0): | |
| try: | |
| return int(value) | |
| except (TypeError, ValueError): | |
| return default | |
| st.set_page_config(page_title="H2EPR-Bench Explorer", layout="wide") | |
| st.markdown( | |
| """ | |
| <style> | |
| div[data-testid="stMetric"] { | |
| border: 1px solid #e5e7eb; | |
| border-radius: 8px; | |
| padding: 0.35rem 0.6rem; | |
| background: #fbfbf8; | |
| } | |
| .h2epr-kicker { | |
| color: #4b5563; | |
| font-size: 0.92rem; | |
| letter-spacing: 0; | |
| margin-bottom: 0.25rem; | |
| } | |
| .h2epr-title { | |
| font-size: 2.15rem; | |
| font-weight: 760; | |
| line-height: 1.12; | |
| margin-bottom: 0.25rem; | |
| } | |
| .h2epr-subtitle { | |
| color: #374151; | |
| max-width: 920px; | |
| margin-bottom: 0.75rem; | |
| } | |
| </style> | |
| """, | |
| unsafe_allow_html=True, | |
| ) | |
| st.markdown('<div class="h2epr-kicker">H²EPR-Bench · public release explorer</div>', unsafe_allow_html=True) | |
| st.markdown('<div class="h2epr-title">Event-process graph browser</div>', unsafe_allow_html=True) | |
| st.markdown( | |
| '<div class="h2epr-subtitle">Browse public event metadata, stage rows, FinalCascade summaries, and Gantt-style timelines for the H²EPR-Bench release.</div>', | |
| unsafe_allow_html=True, | |
| ) | |
| st.info(RELEASE_BOUNDARY_NOTICE) | |
| catalog = load_catalog() | |
| stages = load_stages() | |
| summary = load_finalcascade_summary() | |
| catalog_rows = _as_records(catalog) | |
| with st.sidebar: | |
| st.header("Filter events") | |
| query = st.text_input("Search", placeholder="event name, ID, category, keyword") | |
| domains = st.multiselect("Domain", sorted(catalog["domain"].dropna().unique().tolist())) | |
| categories = st.multiselect("Category", sorted(catalog["event_category"].dropna().unique().tolist())) | |
| min_source_count = st.slider("Minimum sources", 0, int(catalog["source_count"].max()), 0) | |
| min_stage_count = st.slider("Minimum stages", 0, int(catalog["stage_count"].max()), 0) | |
| st.divider() | |
| st.link_button("Dataset repository", f"https://huggingface.co/datasets/{PUBLIC_DATASET_REPO}", use_container_width=True) | |
| st.link_button("Request Gold access", f"https://huggingface.co/datasets/{GOLD_COMPANION_REPO}", use_container_width=True) | |
| filtered_rows = filter_catalog( | |
| catalog_rows, | |
| query=query, | |
| domains=domains, | |
| categories=categories, | |
| min_source_count=min_source_count, | |
| min_stage_count=min_stage_count, | |
| ) | |
| if not filtered_rows: | |
| st.warning("No event matches the current filters.") | |
| st.stop() | |
| event_labels = {row["event_id"]: event_display_label(row) for row in catalog_rows} | |
| requested_event_id = query_param_event_id(st.query_params) | |
| selected_index = resolve_selected_event_index(filtered_rows, requested_event_id) | |
| selected_event = st.selectbox( | |
| "Selected event", | |
| [row["event_id"] for row in filtered_rows], | |
| index=selected_index, | |
| format_func=lambda event_id: event_labels.get(event_id, event_id), | |
| ) | |
| try: | |
| st.query_params["event_id"] = selected_event | |
| except Exception: | |
| pass | |
| event_row = catalog[catalog["event_id"] == selected_event].iloc[0] | |
| event_record = event_row.to_dict() | |
| event_stages = _sort_stage_frame(stages[stages["event_id"] == selected_event]) | |
| summary_row = summary[summary["event_id"] == selected_event] | |
| event_links = build_event_links(selected_event, str(event_record.get("gantt_html_path") or "")) | |
| st.caption(filter_summary_text(len(filtered_rows), len(catalog_rows))) | |
| tabs = st.tabs(["Catalog", "Event detail", "Timeline", "Stages", "FinalCascade JSON", "Access and boundary"]) | |
| with tabs[0]: | |
| st.subheader("Event catalog") | |
| st.dataframe(_select_columns(catalog[catalog["event_id"].isin([row["event_id"] for row in filtered_rows])], CATALOG_COLUMNS), use_container_width=True, height=520) | |
| with tabs[1]: | |
| st.subheader(event_name(event_record)) | |
| st.write(event_description(event_record)) | |
| c1, c2, c3, c4, c5 = st.columns(5) | |
| c1.metric("Sources", _safe_int(event_row.get("source_count", 0))) | |
| c2.metric("Stages", _safe_int(event_row.get("stage_count", 0))) | |
| c3.metric("Episodes", _safe_int(event_row.get("episode_count", 0))) | |
| c4.metric("Participants", _safe_int(event_row.get("participant_count", 0))) | |
| c5.metric("Relations", _safe_int(event_row.get("relation_count", 0))) | |
| st.markdown("#### Event profile") | |
| profile_columns = [ | |
| "event_id", | |
| "display_name", | |
| "domain", | |
| "event_category", | |
| "event_scope_label", | |
| "keywords", | |
| "event_boundary_time_status", | |
| "temporal_anchor_summary", | |
| "gold_reference_access_level", | |
| "finalcascade_access_level", | |
| ] | |
| st.dataframe(_select_columns(catalog[catalog["event_id"] == selected_event], profile_columns), use_container_width=True) | |
| link_cols = st.columns(4) | |
| link_cols[0].link_button("Open dataset", event_links["public_dataset"], use_container_width=True) | |
| link_cols[1].link_button("Gold access", event_links["gold_request"], use_container_width=True) | |
| link_cols[2].link_button("FinalCascade file", event_links["finalcascade_jsonl"], use_container_width=True) | |
| if "gantt_html" in event_links: | |
| link_cols[3].link_button("Gantt artifact", event_links["gantt_html"], use_container_width=True) | |
| if not summary_row.empty: | |
| st.markdown("#### Public FinalCascade summary") | |
| summary_columns = [ | |
| "event_id", | |
| "stage_count", | |
| "episode_count", | |
| "participant_count", | |
| "transaction_count", | |
| "relation_count", | |
| "event_boundary_time_status", | |
| "known_action_time_anchor_count", | |
| "not_gold_warning", | |
| ] | |
| st.dataframe(_select_columns(summary_row, summary_columns), use_container_width=True) | |
| with tabs[2]: | |
| figure = build_timeline_figure(_as_records(event_stages), selected_event) | |
| if figure is None: | |
| st.warning("No public stage rows are available for this event.") | |
| else: | |
| st.plotly_chart(figure, use_container_width=True) | |
| if "gantt_html_path" in event_row and event_row.get("gantt_html_path"): | |
| st.markdown(f"Gantt HTML artifact path: `{event_row.get('gantt_html_path')}`") | |
| with tabs[3]: | |
| st.dataframe(event_stages, use_container_width=True, height=520) | |
| with tabs[4]: | |
| graph = load_event_graph(selected_event) | |
| st.download_button( | |
| "Download selected public FinalCascade JSON", | |
| data=json.dumps(graph, ensure_ascii=False, indent=2), | |
| file_name=f"{selected_event}_finalcascade_public.json", | |
| mime="application/json", | |
| ) | |
| st.json(graph, expanded=False) | |
| with tabs[5]: | |
| st.markdown( | |
| f""" | |
| ### Release boundary | |
| - Public dataset repo: [`{PUBLIC_DATASET_REPO}`](https://huggingface.co/datasets/{PUBLIC_DATASET_REPO}) | |
| - Manual-gated Gold companion: [`{GOLD_COMPANION_REPO}`](https://huggingface.co/datasets/{GOLD_COMPANION_REPO}) | |
| - This Explorer loads public event metadata, public stages, public sanitized FinalCascade records, and public visualization paths. | |
| - It does not load gated Gold references. | |
| - Public FinalCascade and Gantt views are supplementary inspection assets, not official scoring references. | |
| """ | |
| ) | |