File size: 2,587 Bytes
c9b0532 869af49 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 | import streamlit as st
import json
import pandas as pd
import altair as alt
# ---------------------------
# CONFIG
# ---------------------------
FILE_PATH = "src/data_final_version.json"
st.set_page_config(layout="wide")
st.title("Dota Toxic Messages Dashboard")
# ---------------------------
# LOAD DATA
# ---------------------------
@st.cache_data
def load_data():
with open(FILE_PATH, "r", encoding="utf-8") as f:
raw = json.load(f)
return pd.DataFrame(raw)
df = load_data()
# ---------------------------
# CLEAN DATA
# ---------------------------
df["lobby_type"] = df["lobby_type"].fillna("Unknown")
df["region"] = df["region"].fillna("Unknown")
df["message_classified"] = df["message_classified"].fillna("unknown")
# ---------------------------
# SIDEBAR FILTERS
# ---------------------------
st.sidebar.header("Filters")
regions = st.sidebar.multiselect(
"Region",
options=df["region"].unique(),
default=df["region"].unique()
)
lobbies = st.sidebar.multiselect(
"Lobby Type",
options=df["lobby_type"].unique(),
default=df["lobby_type"].unique()
)
df_filtered = df[
(df["region"].isin(regions)) &
(df["lobby_type"].isin(lobbies))
]
# ---------------------------
# KPI
# ---------------------------
col1, col2, col3 = st.columns(3)
col1.metric("Total Messages", len(df_filtered))
col2.metric("Unique Matches", df_filtered["match_id"].nunique())
col3.metric("Avg Duration", round(df_filtered["duration_game"].mean(), 1))
# ---------------------------
# CHART 1 - Toxic categories
# ---------------------------
st.subheader("Toxic Message Distribution")
chart1 = alt.Chart(df_filtered).mark_bar().encode(
x=alt.X("count()", title="Count"),
y=alt.Y("message_classified", sort="-x", title="Category"),
color="message_classified"
)
st.altair_chart(chart1, use_container_width=True)
# ---------------------------
# CHART 2 - By Lobby Type
# ---------------------------
st.subheader("Toxicity by Lobby Type")
chart2 = alt.Chart(df_filtered).mark_bar().encode(
x="lobby_type",
y="count()",
color="lobby_type"
)
st.altair_chart(chart2, use_container_width=True)
# ---------------------------
# CHART 3 - By Region
# ---------------------------
st.subheader("Toxicity by Region")
chart3 = alt.Chart(df_filtered).mark_bar().encode(
x=alt.X("region", sort="-y"),
y="count()",
color="region"
)
st.altair_chart(chart3, use_container_width=True)
# ---------------------------
# RAW DATA (optional)
# ---------------------------
with st.expander("Show raw data"):
st.dataframe(df_filtered) |