Spaces:

ND18
/

DT

Sleeping

File size: 3,969 Bytes

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import streamlit as st

# ======================
# PAGE CONFIG
# ======================
st.set_page_config(
    page_title="MOBA Toxicity Analysis",
    layout="wide"
)

st.title("Toxicity in MOBA Matches")
st.markdown(
    """
    This dashboard explores the relationship between toxic chat messages
    and match outcomes in MOBA games (Dota 2).
    """
)
st.warning("This analysis is based on a limited sample of matches. For some game modes or lobby types, results may be missing or not statistically reliable.")

# ======================
# LOAD DATA
# ======================
@st.cache_data
def load_data():
    return pd.read_excel("src/Final_Dataset_Dota.xlsx")

df = load_data()

# ======================
# TOXICITY DEFINITION
# ======================
negative_labels = [
    "piece of shit", "retarded", "Retarded",
    "Bitch", "Clowns", "report",
    "End Fast", "end fast"
]

df["is_negative"] = df["Message_Clean_Classified"].isin(negative_labels)
df["time_min"] = df["time"] / 60
df["outcome"] = df["radiant_win"].map({True: "Win", False: "Lose"})

# ======================
# FILTERS (SAFE)
# ======================
with st.sidebar:
    st.header("Context filters")

    game_mode_filter = st.multiselect(
        "Game mode",
        sorted(df["game_mode"].dropna().unique())
    )

    lobby_type_filter = st.multiselect(
        "Lobby type",
        sorted(df["lobby_type"].dropna().unique())
    )

# Apply filters only if selection is non-empty
df_f = df.copy()

if game_mode_filter:
    df_f = df_f[df_f["game_mode"].isin(game_mode_filter)]

if lobby_type_filter:
    df_f = df_f[df_f["lobby_type"].isin(lobby_type_filter)]

# Safety fallback (should never be empty in normal use)
if df_f.empty:
    st.warning("No data available for the selected filters.")
    st.stop()

# ======================
# GRAPH 1 – TOXICITY vs MATCH OUTCOME
# ======================
st.subheader("Toxicity vs Match Outcome")

toxicity_per_match = (
    df_f.groupby(["match_id", "outcome"])["is_negative"]
    .sum()
    .reset_index(name="toxic_messages")
)

fig1, ax1 = plt.subplots(figsize=(6, 4))
sns.boxplot(
    data=toxicity_per_match,
    x="outcome",
    y="toxic_messages",
    ax=ax1
)
ax1.set_xlabel("Match outcome")
ax1.set_ylabel("Number of toxic messages")

st.pyplot(fig1)

# ======================
# GRAPH 2 – TOXICITY BY GAME PHASE
# ======================
st.subheader("Toxicity by Game Phase")

def game_phase(t):
    if t < 10:
        return "Early game"
    elif t < 25:
        return "Mid game"
    else:
        return "Late game"

df_f = df_f.copy()
df_f["phase"] = df_f["time_min"].apply(game_phase)

phase_stats = (
    df_f[df_f["is_negative"]]
    .groupby("phase")
    .size()
    .reset_index(name="toxic_messages")
)

fig2, ax2 = plt.subplots(figsize=(6, 4))
sns.barplot(
    data=phase_stats,
    x="phase",
    y="toxic_messages",
    ax=ax2
)
ax2.set_xlabel("Game phase")
ax2.set_ylabel("Number of toxic messages")

st.pyplot(fig2)

# ======================
# GRAPH 3 – TOXICITY HEATMAP BY REGION
# ======================
st.subheader("Toxicity by Region")

region_toxic = (
    df_f[df_f["is_negative"]]
    .groupby("region")
    .size()
    .reset_index(name="toxic_messages")
)

region_total = (
    df_f.groupby("region")
    .size()
    .reset_index(name="total_messages")
)

region_stats = region_toxic.merge(region_total, on="region", how="left")
region_stats["toxicity_rate"] = (
    region_stats["toxic_messages"] / region_stats["total_messages"]
)

heatmap_data = region_stats.pivot_table(
    values="toxicity_rate",
    index="region"
)

fig3, ax3 = plt.subplots(figsize=(4, 6))
sns.heatmap(
    heatmap_data,
    cmap="Reds",
    linewidths=0.5,
    cbar_kws={"label": "Toxicity rate"},
    ax=ax3
)

ax3.set_xlabel("")
ax3.set_ylabel("Region")

st.pyplot(fig3)

# ======================
# FOOTER
# ======================
st.markdown("---")