Spaces:

AgenticFinLab
/

H2EPR-Bench-Explorer

Sleeping

App Files Files Community

H2EPR-Bench-Explorer / app.py

wandler67

Improve H2EPR Explorer event navigation and timeline UX

62cba3a verified 8 days ago

Raw

History Blame Contribute Delete

8.23 kB

	from __future__ import annotations

	import json
	from pathlib import Path
	import sys

	APP_ROOT = Path(__file__).resolve().parent
	sys.path.insert(0, str(APP_ROOT / "src"))

	import streamlit as st

	from h2epr_explorer.constants import (
	CATALOG_COLUMNS,
	GOLD_COMPANION_REPO,
	PUBLIC_DATASET_REPO,
	RELEASE_BOUNDARY_NOTICE,
	)
	from h2epr_explorer.data_loader import load_catalog, load_event_graph, load_finalcascade_summary, load_stages
	from h2epr_explorer.filters import event_description, event_display_label, event_name, filter_catalog
	from h2epr_explorer.navigation import (
	build_event_links,
	filter_summary_text,
	query_param_event_id,
	resolve_selected_event_index,
	)
	from h2epr_explorer.render_gantt import build_timeline_figure


	def _as_records(frame):
	return frame.to_dict(orient="records")


	def _select_columns(frame, columns):
	present = [column for column in columns if column in frame.columns]
	return frame[present] if present else frame


	def _sort_stage_frame(frame):
	sort_columns = [column for column in ("stage_index", "stage_order", "stage_id") if column in frame.columns]
	return frame.sort_values(sort_columns) if sort_columns else frame


	def _safe_int(value, default=0):
	try:
	return int(value)
	except (TypeError, ValueError):
	return default


	st.set_page_config(page_title="H2EPR-Bench Explorer", layout="wide")

	st.markdown(
	"""
	<style>
	div[data-testid="stMetric"] {
	border: 1px solid #e5e7eb;
	border-radius: 8px;
	padding: 0.35rem 0.6rem;
	background: #fbfbf8;
	}
	.h2epr-kicker {
	color: #4b5563;
	font-size: 0.92rem;
	letter-spacing: 0;
	margin-bottom: 0.25rem;
	}
	.h2epr-title {
	font-size: 2.15rem;
	font-weight: 760;
	line-height: 1.12;
	margin-bottom: 0.25rem;
	}
	.h2epr-subtitle {
	color: #374151;
	max-width: 920px;
	margin-bottom: 0.75rem;
	}
	</style>
	""",
	unsafe_allow_html=True,
	)

	st.markdown('<div class="h2epr-kicker">H²EPR-Bench · public release explorer</div>', unsafe_allow_html=True)
	st.markdown('<div class="h2epr-title">Event-process graph browser</div>', unsafe_allow_html=True)
	st.markdown(
	'<div class="h2epr-subtitle">Browse public event metadata, stage rows, FinalCascade summaries, and Gantt-style timelines for the H²EPR-Bench release.</div>',
	unsafe_allow_html=True,
	)
	st.info(RELEASE_BOUNDARY_NOTICE)

	catalog = load_catalog()
	stages = load_stages()
	summary = load_finalcascade_summary()

	catalog_rows = _as_records(catalog)

	with st.sidebar:
	st.header("Filter events")
	query = st.text_input("Search", placeholder="event name, ID, category, keyword")
	domains = st.multiselect("Domain", sorted(catalog["domain"].dropna().unique().tolist()))
	categories = st.multiselect("Category", sorted(catalog["event_category"].dropna().unique().tolist()))
	min_source_count = st.slider("Minimum sources", 0, int(catalog["source_count"].max()), 0)
	min_stage_count = st.slider("Minimum stages", 0, int(catalog["stage_count"].max()), 0)
	st.divider()
	st.link_button("Dataset repository", f"https://huggingface.co/datasets/{PUBLIC_DATASET_REPO}", use_container_width=True)
	st.link_button("Request Gold access", f"https://huggingface.co/datasets/{GOLD_COMPANION_REPO}", use_container_width=True)

	filtered_rows = filter_catalog(
	catalog_rows,
	query=query,
	domains=domains,
	categories=categories,
	min_source_count=min_source_count,
	min_stage_count=min_stage_count,
	)

	if not filtered_rows:
	st.warning("No event matches the current filters.")
	st.stop()

	event_labels = {row["event_id"]: event_display_label(row) for row in catalog_rows}
	requested_event_id = query_param_event_id(st.query_params)
	selected_index = resolve_selected_event_index(filtered_rows, requested_event_id)

	selected_event = st.selectbox(
	"Selected event",
	[row["event_id"] for row in filtered_rows],
	index=selected_index,
	format_func=lambda event_id: event_labels.get(event_id, event_id),
	)
	try:
	st.query_params["event_id"] = selected_event
	except Exception:
	pass

	event_row = catalog[catalog["event_id"] == selected_event].iloc[0]
	event_record = event_row.to_dict()
	event_stages = _sort_stage_frame(stages[stages["event_id"] == selected_event])
	summary_row = summary[summary["event_id"] == selected_event]
	event_links = build_event_links(selected_event, str(event_record.get("gantt_html_path") or ""))

	st.caption(filter_summary_text(len(filtered_rows), len(catalog_rows)))

	tabs = st.tabs(["Catalog", "Event detail", "Timeline", "Stages", "FinalCascade JSON", "Access and boundary"])

	with tabs[0]:
	st.subheader("Event catalog")
	st.dataframe(_select_columns(catalog[catalog["event_id"].isin([row["event_id"] for row in filtered_rows])], CATALOG_COLUMNS), use_container_width=True, height=520)

	with tabs[1]:
	st.subheader(event_name(event_record))
	st.write(event_description(event_record))
	c1, c2, c3, c4, c5 = st.columns(5)
	c1.metric("Sources", _safe_int(event_row.get("source_count", 0)))
	c2.metric("Stages", _safe_int(event_row.get("stage_count", 0)))
	c3.metric("Episodes", _safe_int(event_row.get("episode_count", 0)))
	c4.metric("Participants", _safe_int(event_row.get("participant_count", 0)))
	c5.metric("Relations", _safe_int(event_row.get("relation_count", 0)))

	st.markdown("#### Event profile")
	profile_columns = [
	"event_id",
	"display_name",
	"domain",
	"event_category",
	"event_scope_label",
	"keywords",
	"event_boundary_time_status",
	"temporal_anchor_summary",
	"gold_reference_access_level",
	"finalcascade_access_level",
	]
	st.dataframe(_select_columns(catalog[catalog["event_id"] == selected_event], profile_columns), use_container_width=True)

	link_cols = st.columns(4)
	link_cols[0].link_button("Open dataset", event_links["public_dataset"], use_container_width=True)
	link_cols[1].link_button("Gold access", event_links["gold_request"], use_container_width=True)
	link_cols[2].link_button("FinalCascade file", event_links["finalcascade_jsonl"], use_container_width=True)
	if "gantt_html" in event_links:
	link_cols[3].link_button("Gantt artifact", event_links["gantt_html"], use_container_width=True)

	if not summary_row.empty:
	st.markdown("#### Public FinalCascade summary")
	summary_columns = [
	"event_id",
	"stage_count",
	"episode_count",
	"participant_count",
	"transaction_count",
	"relation_count",
	"event_boundary_time_status",
	"known_action_time_anchor_count",
	"not_gold_warning",
	]
	st.dataframe(_select_columns(summary_row, summary_columns), use_container_width=True)

	with tabs[2]:
	figure = build_timeline_figure(_as_records(event_stages), selected_event)
	if figure is None:
	st.warning("No public stage rows are available for this event.")
	else:
	st.plotly_chart(figure, use_container_width=True)
	if "gantt_html_path" in event_row and event_row.get("gantt_html_path"):
	st.markdown(f"Gantt HTML artifact path: `{event_row.get('gantt_html_path')}`")

	with tabs[3]:
	st.dataframe(event_stages, use_container_width=True, height=520)

	with tabs[4]:
	graph = load_event_graph(selected_event)
	st.download_button(
	"Download selected public FinalCascade JSON",
	data=json.dumps(graph, ensure_ascii=False, indent=2),
	file_name=f"{selected_event}_finalcascade_public.json",
	mime="application/json",
	)
	st.json(graph, expanded=False)

	with tabs[5]:
	st.markdown(
	f"""
	### Release boundary

	- Public dataset repo: [`{PUBLIC_DATASET_REPO}`](https://huggingface.co/datasets/{PUBLIC_DATASET_REPO})
	- Manual-gated Gold companion: [`{GOLD_COMPANION_REPO}`](https://huggingface.co/datasets/{GOLD_COMPANION_REPO})
	- This Explorer loads public event metadata, public stages, public sanitized FinalCascade records, and public visualization paths.
	- It does not load gated Gold references.
	- Public FinalCascade and Gantt views are supplementary inspection assets, not official scoring references.
	"""
	)