Spaces:
Running
Running
File size: 3,606 Bytes
037f332 3c7d587 037f332 3c7d587 037f332 3c7d587 037f332 c5e72f6 037f332 c5e72f6 037f332 3c7d587 c5e72f6 3c7d587 037f332 c5e72f6 037f332 3c7d587 037f332 3c7d587 c5e72f6 3c7d587 037f332 c5e72f6 037f332 3c7d587 c5e72f6 3c7d587 037f332 3c7d587 037f332 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 | """VynFi Γ pm4py: Interactive Process Mining Demo"""
import streamlit as st
import pandas as pd
from collections import Counter
st.set_page_config(page_title="VynFi Process Mining", page_icon="π", layout="wide")
st.title("π VynFi Γ pm4py: Process Mining Demo")
st.caption("Synthetic supply-chain event log from [VynFi](https://vynfi.com)")
@st.cache_data
def load_data():
from datasets import load_dataset
ds = load_dataset("VynFi/vynfi-supply-chain-ocel", "events", split="train")
df = ds.to_pandas()
df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
# Drop rows with NaT timestamps (pm4py can't handle them)
df = df.dropna(subset=["timestamp"])
# Rename for pm4py β use safe names without colons for display
df = df.rename(columns={
"case_id": "case_id_pm",
"activity_name": "activity",
"timestamp": "ts",
})
return df
df = load_data()
st.sidebar.header("Dataset")
st.sidebar.metric("Events", f"{len(df):,}")
st.sidebar.metric("Activities", df["activity"].nunique())
st.sidebar.metric("Cases", df["case_id_pm"].nunique())
tab1, tab2, tab3, tab4 = st.tabs(["Process Model", "Variants", "Statistics", "Raw Data"])
with tab1:
st.subheader("Directly-Follows Graph")
try:
import pm4py
# Convert to pm4py format
pm_df = df.rename(columns={
"case_id_pm": "case:concept:name",
"activity": "concept:name",
"ts": "time:timestamp",
})
event_log = pm4py.convert_to_event_log(pm_df)
dfg, sa, ea = pm4py.discover_dfg(event_log)
from pm4py.visualization.dfg import visualizer as dfg_vis
gviz = dfg_vis.apply(dfg, log=event_log, variant=dfg_vis.Variants.FREQUENCY,
parameters={
dfg_vis.Variants.FREQUENCY.value.Parameters.START_ACTIVITIES: sa,
dfg_vis.Variants.FREQUENCY.value.Parameters.END_ACTIVITIES: ea,
dfg_vis.Variants.FREQUENCY.value.Parameters.FORMAT: "svg",
})
st.image(dfg_vis.serialize(gviz).decode("utf-8"), use_container_width=True)
except Exception as e:
st.warning(f"Could not render DFG: {e}")
st.info("Try the Variants or Statistics tabs instead.")
with tab2:
st.subheader("Process Variants")
variants = {}
for cid, grp in df.sort_values("ts").groupby("case_id_pm"):
variants[cid] = tuple(grp["activity"].tolist())
vc = Counter(variants.values())
total = len(variants)
st.metric("Unique Variants", len(vc))
rows = [{"Trace": " β ".join(t), "Count": c, "Frequency": f"{c/total*100:.1f}%"}
for t, c in vc.most_common(20)]
st.dataframe(pd.DataFrame(rows), use_container_width=True, hide_index=True)
if vc:
hp = vc.most_common(1)[0]
st.info(f"**Happy path**: {' β '.join(hp[0])} ({hp[1]} cases, {hp[1]/total*100:.1f}%)")
with tab3:
st.subheader("Activity Frequency")
ac = df["activity"].value_counts().reset_index()
ac.columns = ["Activity", "Count"]
st.bar_chart(ac, x="Activity", y="Count")
st.subheader("Events Over Time")
if "ts" in df.columns:
weekly = df.set_index("ts").resample("W").size().reset_index()
weekly.columns = ["Week", "Events"]
st.line_chart(weekly, x="Week", y="Events")
with tab4:
st.subheader("Raw Event Data")
st.dataframe(df.head(200), use_container_width=True)
st.divider()
st.caption("[VynFi](https://vynfi.com) Β· [pm4py](https://pm4py.fit.fraunhofer.de/) Β· [Dataset](https://huggingface.co/datasets/VynFi/vynfi-supply-chain-ocel)")
|