wandler67's picture
Improve H2EPR Explorer event navigation and timeline UX
62cba3a verified
Raw
History Blame Contribute Delete
8.23 kB
from __future__ import annotations
import json
from pathlib import Path
import sys
APP_ROOT = Path(__file__).resolve().parent
sys.path.insert(0, str(APP_ROOT / "src"))
import streamlit as st
from h2epr_explorer.constants import (
CATALOG_COLUMNS,
GOLD_COMPANION_REPO,
PUBLIC_DATASET_REPO,
RELEASE_BOUNDARY_NOTICE,
)
from h2epr_explorer.data_loader import load_catalog, load_event_graph, load_finalcascade_summary, load_stages
from h2epr_explorer.filters import event_description, event_display_label, event_name, filter_catalog
from h2epr_explorer.navigation import (
build_event_links,
filter_summary_text,
query_param_event_id,
resolve_selected_event_index,
)
from h2epr_explorer.render_gantt import build_timeline_figure
def _as_records(frame):
return frame.to_dict(orient="records")
def _select_columns(frame, columns):
present = [column for column in columns if column in frame.columns]
return frame[present] if present else frame
def _sort_stage_frame(frame):
sort_columns = [column for column in ("stage_index", "stage_order", "stage_id") if column in frame.columns]
return frame.sort_values(sort_columns) if sort_columns else frame
def _safe_int(value, default=0):
try:
return int(value)
except (TypeError, ValueError):
return default
st.set_page_config(page_title="H2EPR-Bench Explorer", layout="wide")
st.markdown(
"""
<style>
div[data-testid="stMetric"] {
border: 1px solid #e5e7eb;
border-radius: 8px;
padding: 0.35rem 0.6rem;
background: #fbfbf8;
}
.h2epr-kicker {
color: #4b5563;
font-size: 0.92rem;
letter-spacing: 0;
margin-bottom: 0.25rem;
}
.h2epr-title {
font-size: 2.15rem;
font-weight: 760;
line-height: 1.12;
margin-bottom: 0.25rem;
}
.h2epr-subtitle {
color: #374151;
max-width: 920px;
margin-bottom: 0.75rem;
}
</style>
""",
unsafe_allow_html=True,
)
st.markdown('<div class="h2epr-kicker">H²EPR-Bench · public release explorer</div>', unsafe_allow_html=True)
st.markdown('<div class="h2epr-title">Event-process graph browser</div>', unsafe_allow_html=True)
st.markdown(
'<div class="h2epr-subtitle">Browse public event metadata, stage rows, FinalCascade summaries, and Gantt-style timelines for the H²EPR-Bench release.</div>',
unsafe_allow_html=True,
)
st.info(RELEASE_BOUNDARY_NOTICE)
catalog = load_catalog()
stages = load_stages()
summary = load_finalcascade_summary()
catalog_rows = _as_records(catalog)
with st.sidebar:
st.header("Filter events")
query = st.text_input("Search", placeholder="event name, ID, category, keyword")
domains = st.multiselect("Domain", sorted(catalog["domain"].dropna().unique().tolist()))
categories = st.multiselect("Category", sorted(catalog["event_category"].dropna().unique().tolist()))
min_source_count = st.slider("Minimum sources", 0, int(catalog["source_count"].max()), 0)
min_stage_count = st.slider("Minimum stages", 0, int(catalog["stage_count"].max()), 0)
st.divider()
st.link_button("Dataset repository", f"https://huggingface.co/datasets/{PUBLIC_DATASET_REPO}", use_container_width=True)
st.link_button("Request Gold access", f"https://huggingface.co/datasets/{GOLD_COMPANION_REPO}", use_container_width=True)
filtered_rows = filter_catalog(
catalog_rows,
query=query,
domains=domains,
categories=categories,
min_source_count=min_source_count,
min_stage_count=min_stage_count,
)
if not filtered_rows:
st.warning("No event matches the current filters.")
st.stop()
event_labels = {row["event_id"]: event_display_label(row) for row in catalog_rows}
requested_event_id = query_param_event_id(st.query_params)
selected_index = resolve_selected_event_index(filtered_rows, requested_event_id)
selected_event = st.selectbox(
"Selected event",
[row["event_id"] for row in filtered_rows],
index=selected_index,
format_func=lambda event_id: event_labels.get(event_id, event_id),
)
try:
st.query_params["event_id"] = selected_event
except Exception:
pass
event_row = catalog[catalog["event_id"] == selected_event].iloc[0]
event_record = event_row.to_dict()
event_stages = _sort_stage_frame(stages[stages["event_id"] == selected_event])
summary_row = summary[summary["event_id"] == selected_event]
event_links = build_event_links(selected_event, str(event_record.get("gantt_html_path") or ""))
st.caption(filter_summary_text(len(filtered_rows), len(catalog_rows)))
tabs = st.tabs(["Catalog", "Event detail", "Timeline", "Stages", "FinalCascade JSON", "Access and boundary"])
with tabs[0]:
st.subheader("Event catalog")
st.dataframe(_select_columns(catalog[catalog["event_id"].isin([row["event_id"] for row in filtered_rows])], CATALOG_COLUMNS), use_container_width=True, height=520)
with tabs[1]:
st.subheader(event_name(event_record))
st.write(event_description(event_record))
c1, c2, c3, c4, c5 = st.columns(5)
c1.metric("Sources", _safe_int(event_row.get("source_count", 0)))
c2.metric("Stages", _safe_int(event_row.get("stage_count", 0)))
c3.metric("Episodes", _safe_int(event_row.get("episode_count", 0)))
c4.metric("Participants", _safe_int(event_row.get("participant_count", 0)))
c5.metric("Relations", _safe_int(event_row.get("relation_count", 0)))
st.markdown("#### Event profile")
profile_columns = [
"event_id",
"display_name",
"domain",
"event_category",
"event_scope_label",
"keywords",
"event_boundary_time_status",
"temporal_anchor_summary",
"gold_reference_access_level",
"finalcascade_access_level",
]
st.dataframe(_select_columns(catalog[catalog["event_id"] == selected_event], profile_columns), use_container_width=True)
link_cols = st.columns(4)
link_cols[0].link_button("Open dataset", event_links["public_dataset"], use_container_width=True)
link_cols[1].link_button("Gold access", event_links["gold_request"], use_container_width=True)
link_cols[2].link_button("FinalCascade file", event_links["finalcascade_jsonl"], use_container_width=True)
if "gantt_html" in event_links:
link_cols[3].link_button("Gantt artifact", event_links["gantt_html"], use_container_width=True)
if not summary_row.empty:
st.markdown("#### Public FinalCascade summary")
summary_columns = [
"event_id",
"stage_count",
"episode_count",
"participant_count",
"transaction_count",
"relation_count",
"event_boundary_time_status",
"known_action_time_anchor_count",
"not_gold_warning",
]
st.dataframe(_select_columns(summary_row, summary_columns), use_container_width=True)
with tabs[2]:
figure = build_timeline_figure(_as_records(event_stages), selected_event)
if figure is None:
st.warning("No public stage rows are available for this event.")
else:
st.plotly_chart(figure, use_container_width=True)
if "gantt_html_path" in event_row and event_row.get("gantt_html_path"):
st.markdown(f"Gantt HTML artifact path: `{event_row.get('gantt_html_path')}`")
with tabs[3]:
st.dataframe(event_stages, use_container_width=True, height=520)
with tabs[4]:
graph = load_event_graph(selected_event)
st.download_button(
"Download selected public FinalCascade JSON",
data=json.dumps(graph, ensure_ascii=False, indent=2),
file_name=f"{selected_event}_finalcascade_public.json",
mime="application/json",
)
st.json(graph, expanded=False)
with tabs[5]:
st.markdown(
f"""
### Release boundary
- Public dataset repo: [`{PUBLIC_DATASET_REPO}`](https://huggingface.co/datasets/{PUBLIC_DATASET_REPO})
- Manual-gated Gold companion: [`{GOLD_COMPANION_REPO}`](https://huggingface.co/datasets/{GOLD_COMPANION_REPO})
- This Explorer loads public event metadata, public stages, public sanitized FinalCascade records, and public visualization paths.
- It does not load gated Gold references.
- Public FinalCascade and Gantt views are supplementary inspection assets, not official scoring references.
"""
)