Spaces:
Sleeping
Sleeping
File size: 8,226 Bytes
704baa5 e6d063b 62cba3a 704baa5 62cba3a 704baa5 62cba3a 704baa5 62cba3a 704baa5 e6d063b 62cba3a e6d063b 704baa5 62cba3a e6d063b 704baa5 62cba3a 704baa5 e6d063b 62cba3a 704baa5 62cba3a 704baa5 62cba3a 704baa5 62cba3a 704baa5 e6d063b 62cba3a 704baa5 62cba3a 704baa5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 | from __future__ import annotations
import json
from pathlib import Path
import sys
APP_ROOT = Path(__file__).resolve().parent
sys.path.insert(0, str(APP_ROOT / "src"))
import streamlit as st
from h2epr_explorer.constants import (
CATALOG_COLUMNS,
GOLD_COMPANION_REPO,
PUBLIC_DATASET_REPO,
RELEASE_BOUNDARY_NOTICE,
)
from h2epr_explorer.data_loader import load_catalog, load_event_graph, load_finalcascade_summary, load_stages
from h2epr_explorer.filters import event_description, event_display_label, event_name, filter_catalog
from h2epr_explorer.navigation import (
build_event_links,
filter_summary_text,
query_param_event_id,
resolve_selected_event_index,
)
from h2epr_explorer.render_gantt import build_timeline_figure
def _as_records(frame):
return frame.to_dict(orient="records")
def _select_columns(frame, columns):
present = [column for column in columns if column in frame.columns]
return frame[present] if present else frame
def _sort_stage_frame(frame):
sort_columns = [column for column in ("stage_index", "stage_order", "stage_id") if column in frame.columns]
return frame.sort_values(sort_columns) if sort_columns else frame
def _safe_int(value, default=0):
try:
return int(value)
except (TypeError, ValueError):
return default
st.set_page_config(page_title="H2EPR-Bench Explorer", layout="wide")
st.markdown(
"""
<style>
div[data-testid="stMetric"] {
border: 1px solid #e5e7eb;
border-radius: 8px;
padding: 0.35rem 0.6rem;
background: #fbfbf8;
}
.h2epr-kicker {
color: #4b5563;
font-size: 0.92rem;
letter-spacing: 0;
margin-bottom: 0.25rem;
}
.h2epr-title {
font-size: 2.15rem;
font-weight: 760;
line-height: 1.12;
margin-bottom: 0.25rem;
}
.h2epr-subtitle {
color: #374151;
max-width: 920px;
margin-bottom: 0.75rem;
}
</style>
""",
unsafe_allow_html=True,
)
st.markdown('<div class="h2epr-kicker">H²EPR-Bench · public release explorer</div>', unsafe_allow_html=True)
st.markdown('<div class="h2epr-title">Event-process graph browser</div>', unsafe_allow_html=True)
st.markdown(
'<div class="h2epr-subtitle">Browse public event metadata, stage rows, FinalCascade summaries, and Gantt-style timelines for the H²EPR-Bench release.</div>',
unsafe_allow_html=True,
)
st.info(RELEASE_BOUNDARY_NOTICE)
catalog = load_catalog()
stages = load_stages()
summary = load_finalcascade_summary()
catalog_rows = _as_records(catalog)
with st.sidebar:
st.header("Filter events")
query = st.text_input("Search", placeholder="event name, ID, category, keyword")
domains = st.multiselect("Domain", sorted(catalog["domain"].dropna().unique().tolist()))
categories = st.multiselect("Category", sorted(catalog["event_category"].dropna().unique().tolist()))
min_source_count = st.slider("Minimum sources", 0, int(catalog["source_count"].max()), 0)
min_stage_count = st.slider("Minimum stages", 0, int(catalog["stage_count"].max()), 0)
st.divider()
st.link_button("Dataset repository", f"https://huggingface.co/datasets/{PUBLIC_DATASET_REPO}", use_container_width=True)
st.link_button("Request Gold access", f"https://huggingface.co/datasets/{GOLD_COMPANION_REPO}", use_container_width=True)
filtered_rows = filter_catalog(
catalog_rows,
query=query,
domains=domains,
categories=categories,
min_source_count=min_source_count,
min_stage_count=min_stage_count,
)
if not filtered_rows:
st.warning("No event matches the current filters.")
st.stop()
event_labels = {row["event_id"]: event_display_label(row) for row in catalog_rows}
requested_event_id = query_param_event_id(st.query_params)
selected_index = resolve_selected_event_index(filtered_rows, requested_event_id)
selected_event = st.selectbox(
"Selected event",
[row["event_id"] for row in filtered_rows],
index=selected_index,
format_func=lambda event_id: event_labels.get(event_id, event_id),
)
try:
st.query_params["event_id"] = selected_event
except Exception:
pass
event_row = catalog[catalog["event_id"] == selected_event].iloc[0]
event_record = event_row.to_dict()
event_stages = _sort_stage_frame(stages[stages["event_id"] == selected_event])
summary_row = summary[summary["event_id"] == selected_event]
event_links = build_event_links(selected_event, str(event_record.get("gantt_html_path") or ""))
st.caption(filter_summary_text(len(filtered_rows), len(catalog_rows)))
tabs = st.tabs(["Catalog", "Event detail", "Timeline", "Stages", "FinalCascade JSON", "Access and boundary"])
with tabs[0]:
st.subheader("Event catalog")
st.dataframe(_select_columns(catalog[catalog["event_id"].isin([row["event_id"] for row in filtered_rows])], CATALOG_COLUMNS), use_container_width=True, height=520)
with tabs[1]:
st.subheader(event_name(event_record))
st.write(event_description(event_record))
c1, c2, c3, c4, c5 = st.columns(5)
c1.metric("Sources", _safe_int(event_row.get("source_count", 0)))
c2.metric("Stages", _safe_int(event_row.get("stage_count", 0)))
c3.metric("Episodes", _safe_int(event_row.get("episode_count", 0)))
c4.metric("Participants", _safe_int(event_row.get("participant_count", 0)))
c5.metric("Relations", _safe_int(event_row.get("relation_count", 0)))
st.markdown("#### Event profile")
profile_columns = [
"event_id",
"display_name",
"domain",
"event_category",
"event_scope_label",
"keywords",
"event_boundary_time_status",
"temporal_anchor_summary",
"gold_reference_access_level",
"finalcascade_access_level",
]
st.dataframe(_select_columns(catalog[catalog["event_id"] == selected_event], profile_columns), use_container_width=True)
link_cols = st.columns(4)
link_cols[0].link_button("Open dataset", event_links["public_dataset"], use_container_width=True)
link_cols[1].link_button("Gold access", event_links["gold_request"], use_container_width=True)
link_cols[2].link_button("FinalCascade file", event_links["finalcascade_jsonl"], use_container_width=True)
if "gantt_html" in event_links:
link_cols[3].link_button("Gantt artifact", event_links["gantt_html"], use_container_width=True)
if not summary_row.empty:
st.markdown("#### Public FinalCascade summary")
summary_columns = [
"event_id",
"stage_count",
"episode_count",
"participant_count",
"transaction_count",
"relation_count",
"event_boundary_time_status",
"known_action_time_anchor_count",
"not_gold_warning",
]
st.dataframe(_select_columns(summary_row, summary_columns), use_container_width=True)
with tabs[2]:
figure = build_timeline_figure(_as_records(event_stages), selected_event)
if figure is None:
st.warning("No public stage rows are available for this event.")
else:
st.plotly_chart(figure, use_container_width=True)
if "gantt_html_path" in event_row and event_row.get("gantt_html_path"):
st.markdown(f"Gantt HTML artifact path: `{event_row.get('gantt_html_path')}`")
with tabs[3]:
st.dataframe(event_stages, use_container_width=True, height=520)
with tabs[4]:
graph = load_event_graph(selected_event)
st.download_button(
"Download selected public FinalCascade JSON",
data=json.dumps(graph, ensure_ascii=False, indent=2),
file_name=f"{selected_event}_finalcascade_public.json",
mime="application/json",
)
st.json(graph, expanded=False)
with tabs[5]:
st.markdown(
f"""
### Release boundary
- Public dataset repo: [`{PUBLIC_DATASET_REPO}`](https://huggingface.co/datasets/{PUBLIC_DATASET_REPO})
- Manual-gated Gold companion: [`{GOLD_COMPANION_REPO}`](https://huggingface.co/datasets/{GOLD_COMPANION_REPO})
- This Explorer loads public event metadata, public stages, public sanitized FinalCascade records, and public visualization paths.
- It does not load gated Gold references.
- Public FinalCascade and Gantt views are supplementary inspection assets, not official scoring references.
"""
)
|