Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,12 +1,20 @@
|
|
| 1 |
-
# app.py — Titanic Data Adventure
|
|
|
|
|
|
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
import pandas as pd
|
| 4 |
import numpy as np
|
| 5 |
import os
|
| 6 |
import plotly.express as px
|
|
|
|
| 7 |
from sklearn.model_selection import train_test_split
|
| 8 |
-
from sklearn.preprocessing import LabelEncoder
|
|
|
|
|
|
|
| 9 |
from sklearn.ensemble import RandomForestClassifier
|
|
|
|
|
|
|
| 10 |
|
| 11 |
# =========================
|
| 12 |
# Data laden en voorbereiden
|
|
@@ -39,27 +47,20 @@ def load_data(path="Titanic-Dataset.csv"):
|
|
| 39 |
|
| 40 |
df = load_data()
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
# =========================
|
| 43 |
-
#
|
| 44 |
# =========================
|
| 45 |
-
def
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
for c in X.select_dtypes("object").columns:
|
| 51 |
-
le = LabelEncoder()
|
| 52 |
-
X[c] = le.fit_transform(X[c])
|
| 53 |
-
|
| 54 |
-
X_train, X_test, y_train, y_test = train_test_split(
|
| 55 |
-
X, y, test_size=0.25, random_state=42, stratify=y
|
| 56 |
-
)
|
| 57 |
-
model = RandomForestClassifier(n_estimators=300, random_state=42)
|
| 58 |
-
model.fit(X_train, y_train)
|
| 59 |
-
acc = model.score(X_test, y_test)
|
| 60 |
-
return model, acc
|
| 61 |
|
| 62 |
-
|
| 63 |
|
| 64 |
# =========================
|
| 65 |
# Plots (licht, informatief)
|
|
@@ -76,33 +77,6 @@ def make_plot(fig, title):
|
|
| 76 |
)
|
| 77 |
return fig
|
| 78 |
|
| 79 |
-
def plot_survival_vs_age(dfx):
|
| 80 |
-
"""
|
| 81 |
-
2D plot: ronde bolletjes; X = leeftijd, Y = overleving (twee rijen).
|
| 82 |
-
Kleur = geslacht (duidelijk contrast), hover toont rijke passagiersinfo.
|
| 83 |
-
"""
|
| 84 |
-
d = dfx.copy()
|
| 85 |
-
d["overleving"] = d["survived"].map({0:"Niet overleefd", 1:"Overleefd"})
|
| 86 |
-
extra_cols = [c for c in ["name","ticket","cabin"] if c in d.columns]
|
| 87 |
-
hover_cols = ["pclass","sex","age","sibsp","parch","family_size","fare","embarked","overleving"] + extra_cols
|
| 88 |
-
|
| 89 |
-
fig = px.scatter(
|
| 90 |
-
d,
|
| 91 |
-
x="age",
|
| 92 |
-
y="overleving", # categorische y: twee nette rijen
|
| 93 |
-
color="sex", # extra dimensie zonder rommel
|
| 94 |
-
hover_data=hover_cols,
|
| 95 |
-
labels={"age":"Leeftijd (jaar)", "overleving":"Overleving"},
|
| 96 |
-
color_discrete_map={"Male":"#A3B1C6","Female":"#1B4B91"},
|
| 97 |
-
render_mode="auto"
|
| 98 |
-
)
|
| 99 |
-
fig.update_traces(
|
| 100 |
-
mode="markers",
|
| 101 |
-
marker=dict(symbol="circle", size=9, opacity=0.7, line=dict(width=0.6, color="white"))
|
| 102 |
-
)
|
| 103 |
-
fig.update_yaxes(categoryorder="array", categoryarray=["Niet overleefd","Overleefd"], title=None)
|
| 104 |
-
return make_plot(fig, "Overleving (y) versus Leeftijd (x) — ronde bolletjes (hover voor details)")
|
| 105 |
-
|
| 106 |
def plot_leeftijdsverdeling(dfx):
|
| 107 |
f = px.histogram(
|
| 108 |
dfx, x="age", color="status", nbins=30, barmode="overlay", opacity=0.75,
|
|
@@ -126,31 +100,123 @@ def plot_fare_vs_klasse(dfx):
|
|
| 126 |
return make_plot(f, "Ticketprijs per klasse (met overleving)")
|
| 127 |
|
| 128 |
# =========================
|
| 129 |
-
#
|
| 130 |
# =========================
|
| 131 |
-
def
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
# =========================
|
| 140 |
# Interactieve voorspelling + avontuur-tekst
|
| 141 |
# =========================
|
| 142 |
def predict_and_story(pclass, sex, age, sibsp, parch, fare, embarked):
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
|
| 147 |
-
X_row = [[int(pclass), sex_enc, float(age), int(sibsp), int(parch), float(fare), embarked_enc, family_size]]
|
| 148 |
prob = float(MODEL.predict_proba(X_row)[0,1])
|
| 149 |
pct = prob * 100
|
| 150 |
|
| 151 |
klasse_txt = {1:"eerste", 2:"tweede", 3:"derde"}.get(int(pclass), "onbekende")
|
| 152 |
haven_txt = {"C":"Cherbourg","Q":"Queenstown","S":"Southampton"}.get(embarked, "een onbekende haven")
|
| 153 |
-
rol_txt = "vrouw" if
|
| 154 |
|
| 155 |
if pct >= 75:
|
| 156 |
tone = "Je kansen zijn uitzonderlijk goed."
|
|
@@ -170,7 +236,7 @@ def predict_and_story(pclass, sex, age, sibsp, parch, fare, embarked):
|
|
| 170 |
|
| 171 |
**Situatie:** Je bent een **{rol_txt}** in de **{klasse_txt} klasse**, ingescheept in **{haven_txt}**.
|
| 172 |
Je bent **{int(age)}** jaar oud, reist met **{int(sibsp)}** broer(s)/zus(sen) en **{int(parch)}** ouder(s)/kind(eren).
|
| 173 |
-
Je ticket kostte **£{float(fare):.2f}** en je **familiegrootte** is **{
|
| 174 |
|
| 175 |
**Analyse:** {tone} Het model weegt o.a. klasse, geslacht, leeftijd en familieomvang mee—patronen in de historische data.
|
| 176 |
|
|
@@ -181,34 +247,13 @@ Je voelt de houten reling koud onder je hand. {ending}
|
|
| 181 |
return story
|
| 182 |
|
| 183 |
# =========================
|
| 184 |
-
# Introductietekst (aangepast
|
| 185 |
# =========================
|
| 186 |
INTRO_MD = """
|
| 187 |
# 🛳️ Titanic Data Adventure
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
**
|
| 191 |
-
De RMS *Titanic* vertrekt richting New York: een drijvend paleis, gevuld met verwachtingen. Aan boord: industriëlen in avondkleding, jonge gezinnen met één koffer, bemanningsleden die elke dag routine tot ritueel verheffen. De zee is kalm; de toekomst lijkt maakbaar.
|
| 192 |
-
|
| 193 |
-
Meer dan een eeuw later kijken wij mee — niet met verrekijkers of logboeken, maar met **data**. Elk record in deze dataset is een menselijk verhaal. Door de gegevens te verkennen, begrijpen we beter **wie overleefde — en waarom**.
|
| 194 |
-
|
| 195 |
-
---
|
| 196 |
-
|
| 197 |
-
## Wat je in dit dashboard gaat zien
|
| 198 |
-
- **Overleving versus leeftijd (2D)** — elk **rond bolletje** is één passagier.
|
| 199 |
-
*X-as = leeftijd*, *Y-as = overleving* (twee rijen: *Niet overleefd* en *Overleefd*).
|
| 200 |
-
**Kleur = geslacht**, en **hover** toont details (klasse, familieomvang, vertrekhaven, prijs en – als aanwezig – naam/ticket/cabin).
|
| 201 |
-
- **Leeftijdsverdeling** — overlappende histogrammen tonen verschillen tussen overlevers en niet-overlevers.
|
| 202 |
-
- **Geslachtsverdeling** — verhoudingen mannen/vrouwen in de dataset.
|
| 203 |
-
- **Fare per klasse** — prijsverschillen en spreiding, gekoppeld aan overleving.
|
| 204 |
-
- **Jouw scenario** — stel je eigen kenmerken in, bereken je kans en lees een korte scène uit die nacht.
|
| 205 |
-
|
| 206 |
-
---
|
| 207 |
-
|
| 208 |
-
## Wat een model wél en niet doet
|
| 209 |
-
- ✅ **Herkennen van patronen** in combinaties (bijv. *geslacht + klasse + leeftijd*).
|
| 210 |
-
- ✅ **Schatten, geen zekerheid** — het geeft **kansen**, geen waarheden.
|
| 211 |
-
- ❌ Geen moreel oordeel of individuele lotsbeschikking: context buiten de data blijft onzichtbaar.
|
| 212 |
"""
|
| 213 |
|
| 214 |
# =========================
|
|
@@ -231,14 +276,13 @@ h1, h2, h3, h4 { color: #1B4B91; }
|
|
| 231 |
}
|
| 232 |
.kpi .value { font-size:1.6rem; font-weight:800; color:#1B4B91; }
|
| 233 |
.kpi .label { font-size:.9rem; color:#3F557A; }
|
| 234 |
-
.scroll-md { max-height: 520px; overflow-y: auto; padding-right: 8px; }
|
| 235 |
"""
|
| 236 |
|
| 237 |
with gr.Blocks(css=CUSTOM_CSS, theme=gr.themes.Default(primary_hue="blue")) as demo:
|
| 238 |
-
# Intro
|
| 239 |
with gr.Row():
|
| 240 |
with gr.Column(scale=2, min_width=420):
|
| 241 |
-
gr.Markdown(INTRO_MD, elem_classes=["intro-card"
|
| 242 |
with gr.Column(scale=1, min_width=320):
|
| 243 |
hero_path = get_hero_image_path()
|
| 244 |
if hero_path:
|
|
@@ -246,6 +290,12 @@ with gr.Blocks(css=CUSTOM_CSS, theme=gr.themes.Default(primary_hue="blue")) as d
|
|
| 246 |
else:
|
| 247 |
gr.Markdown("⚠️ **Geen afbeelding gevonden.** Plaats `titanic_bg.png` of `titanic_bg.jpg` in de root.")
|
| 248 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
# KPI's
|
| 250 |
with gr.Row():
|
| 251 |
gr.HTML(f"<div class='kpi'><div class='value'>{len(df):,}</div><div class='label'>Totaal passagiers</div></div>")
|
|
@@ -253,13 +303,12 @@ with gr.Blocks(css=CUSTOM_CSS, theme=gr.themes.Default(primary_hue="blue")) as d
|
|
| 253 |
gr.HTML(f"<div class='kpi'><div class='value'>{df['survived'].mean()*100:.1f}%</div><div class='label'>% Overleefd</div></div>")
|
| 254 |
gr.HTML(f"<div class='kpi'><div class='value'>{', '.join(map(str, sorted(df['pclass'].unique())))}</div><div class='label'>Klassen</div></div>")
|
| 255 |
|
| 256 |
-
#
|
| 257 |
gr.Markdown("## 📊 Verken de data", elem_classes=["panel"])
|
| 258 |
with gr.Row():
|
| 259 |
-
g1 = gr.Plot(label="Overleving vs Leeftijd (2D)")
|
| 260 |
g2 = gr.Plot(label="Leeftijdsverdeling per status")
|
| 261 |
-
with gr.Row():
|
| 262 |
g3 = gr.Plot(label="Geslachtsverdeling")
|
|
|
|
| 263 |
g4 = gr.Plot(label="Ticketprijs per klasse")
|
| 264 |
|
| 265 |
# Interactieve voorspelling
|
|
@@ -277,16 +326,24 @@ with gr.Blocks(css=CUSTOM_CSS, theme=gr.themes.Default(primary_hue="blue")) as d
|
|
| 277 |
btn = gr.Button("🎲 Bereken én vertel mijn verhaal", variant="primary")
|
| 278 |
story_out = gr.Markdown()
|
| 279 |
|
| 280 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
def load_graphs():
|
| 282 |
return (
|
| 283 |
-
plot_survival_vs_age(df), # NIEUW: ronde bolletjes, Y=overleving
|
| 284 |
plot_leeftijdsverdeling(df),
|
| 285 |
plot_geslacht(df),
|
| 286 |
plot_fare_vs_klasse(df),
|
| 287 |
)
|
|
|
|
| 288 |
|
| 289 |
-
|
| 290 |
btn.click(
|
| 291 |
predict_and_story,
|
| 292 |
inputs=[ui_pclass, ui_sex, ui_age, ui_sibsp, ui_parch, ui_fare, ui_emb],
|
|
|
|
| 1 |
+
# app.py — Titanic Data Adventure
|
| 2 |
+
# Wit thema • vaste layout • training zichtbaar bij opstart
|
| 3 |
+
# 2D-projectie (t-SNE/PCA fallback): elk punt = een passagier (hover voor details)
|
| 4 |
+
|
| 5 |
import gradio as gr
|
| 6 |
import pandas as pd
|
| 7 |
import numpy as np
|
| 8 |
import os
|
| 9 |
import plotly.express as px
|
| 10 |
+
|
| 11 |
from sklearn.model_selection import train_test_split
|
| 12 |
+
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
|
| 13 |
+
from sklearn.compose import ColumnTransformer
|
| 14 |
+
from sklearn.pipeline import Pipeline
|
| 15 |
from sklearn.ensemble import RandomForestClassifier
|
| 16 |
+
from sklearn.manifold import TSNE
|
| 17 |
+
from sklearn.decomposition import PCA
|
| 18 |
|
| 19 |
# =========================
|
| 20 |
# Data laden en voorbereiden
|
|
|
|
| 47 |
|
| 48 |
df = load_data()
|
| 49 |
|
| 50 |
+
# ============== Globale modelstaat (gevuld bij opstart) ==============
|
| 51 |
+
MODEL = None
|
| 52 |
+
MODEL_ACC = None
|
| 53 |
+
|
| 54 |
# =========================
|
| 55 |
+
# Hero-afbeelding pad bepalen
|
| 56 |
# =========================
|
| 57 |
+
def get_hero_image_path():
|
| 58 |
+
for name in ["titanic_bg.png", "titanic_bg.jpg", "titanic_bg.jpeg"]:
|
| 59 |
+
if os.path.exists(name):
|
| 60 |
+
return name
|
| 61 |
+
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
+
HERO_PATH = get_hero_image_path()
|
| 64 |
|
| 65 |
# =========================
|
| 66 |
# Plots (licht, informatief)
|
|
|
|
| 77 |
)
|
| 78 |
return fig
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
def plot_leeftijdsverdeling(dfx):
|
| 81 |
f = px.histogram(
|
| 82 |
dfx, x="age", color="status", nbins=30, barmode="overlay", opacity=0.75,
|
|
|
|
| 100 |
return make_plot(f, "Ticketprijs per klasse (met overleving)")
|
| 101 |
|
| 102 |
# =========================
|
| 103 |
+
# Training + 2D-projectie met voortgang
|
| 104 |
# =========================
|
| 105 |
+
def train_and_embed(progress=gr.Progress(track_tqdm=True)):
|
| 106 |
+
"""
|
| 107 |
+
Wordt automatisch aangeroepen bij app-load.
|
| 108 |
+
Toont voortgang + geeft 2D-projectie (elk punt = passagier).
|
| 109 |
+
"""
|
| 110 |
+
global MODEL, MODEL_ACC
|
| 111 |
+
|
| 112 |
+
# Placeholder fig
|
| 113 |
+
placeholder = px.scatter(x=[], y=[])
|
| 114 |
+
placeholder = make_plot(placeholder, "Initialiseren…")
|
| 115 |
+
|
| 116 |
+
progress(0.05, desc="📦 Data laden…")
|
| 117 |
+
status = "📦 Data geladen. Aantal passagiers: **{}**".format(len(df))
|
| 118 |
+
yield status, placeholder
|
| 119 |
+
|
| 120 |
+
# Voorbewerking voor model + embedding
|
| 121 |
+
progress(0.20, desc="🔧 Voorbewerking…")
|
| 122 |
+
features = ["pclass","sex","age","sibsp","parch","fare","embarked","family_size"]
|
| 123 |
+
X = df[features].copy()
|
| 124 |
+
y = df["survived"].astype(int)
|
| 125 |
+
|
| 126 |
+
cat_cols = ["sex","embarked"]
|
| 127 |
+
num_cols = [c for c in features if c not in cat_cols]
|
| 128 |
+
|
| 129 |
+
preproc = ColumnTransformer(
|
| 130 |
+
transformers=[
|
| 131 |
+
("num", StandardScaler(), num_cols),
|
| 132 |
+
("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
|
| 133 |
+
]
|
| 134 |
+
)
|
| 135 |
|
| 136 |
+
# ================== Model trainen ==================
|
| 137 |
+
progress(0.55, desc="🤖 Model trainen (RandomForest)…")
|
| 138 |
+
model = Pipeline(steps=[
|
| 139 |
+
("prep", preproc),
|
| 140 |
+
("clf", RandomForestClassifier(n_estimators=300, random_state=42))
|
| 141 |
+
])
|
| 142 |
+
X_train, X_test, y_train, y_test = train_test_split(
|
| 143 |
+
X, y, test_size=0.25, random_state=42, stratify=y
|
| 144 |
+
)
|
| 145 |
+
model.fit(X_train, y_train)
|
| 146 |
+
acc = model.score(X_test, y_test)
|
| 147 |
+
MODEL = model
|
| 148 |
+
MODEL_ACC = acc
|
| 149 |
+
status = f"✅ Model getraind: **RandomForest**, nauwkeurigheid: **{acc:.2%}**"
|
| 150 |
+
yield status, placeholder
|
| 151 |
+
|
| 152 |
+
# ================== 2D embedding ==================
|
| 153 |
+
progress(0.75, desc="🗺️ 2D-projectie berekenen (t-SNE)…")
|
| 154 |
+
X_all = preproc.fit_transform(X) # fit opnieuw op alle data voor projectie
|
| 155 |
+
# t-SNE kan traag zijn; kies beperkte iteraties en perplexity passend bij dataset
|
| 156 |
+
try:
|
| 157 |
+
tsne = TSNE(n_components=2, perplexity=30, learning_rate="auto", init="random",
|
| 158 |
+
n_iter=600, random_state=42)
|
| 159 |
+
emb = tsne.fit_transform(X_all.toarray() if hasattr(X_all, "toarray") else X_all)
|
| 160 |
+
method = "t-SNE"
|
| 161 |
+
except Exception:
|
| 162 |
+
# Fallback voor omgevingen zonder voldoende geheugen/opties
|
| 163 |
+
pca = PCA(n_components=2, random_state=42)
|
| 164 |
+
emb = pca.fit_transform(X_all.toarray() if hasattr(X_all, "toarray") else X_all)
|
| 165 |
+
method = "PCA"
|
| 166 |
+
|
| 167 |
+
dvis = pd.DataFrame({"x": emb[:,0], "y": emb[:,1]})
|
| 168 |
+
dvis["Geslacht"] = df["sex"].values
|
| 169 |
+
dvis["Overleving"] = df["status"].values
|
| 170 |
+
dvis["Klasse"] = df["pclass"].values
|
| 171 |
+
dvis["Leeftijd"] = df["age"].values
|
| 172 |
+
dvis["Fare (£)"] = df["fare"].values
|
| 173 |
+
dvis["Familie"] = df["family_size"].values
|
| 174 |
+
if "name" in df.columns: dvis["Naam"] = df["name"].values
|
| 175 |
+
if "ticket" in df.columns: dvis["Ticket"] = df["ticket"].values
|
| 176 |
+
if "cabin" in df.columns: dvis["Cabin"] = df["cabin"].values
|
| 177 |
+
|
| 178 |
+
fig = px.scatter(
|
| 179 |
+
dvis,
|
| 180 |
+
x="x", y="y",
|
| 181 |
+
color="Overleving",
|
| 182 |
+
symbol="Klasse",
|
| 183 |
+
hover_data=[c for c in ["Naam","Geslacht","Leeftijd","Familie","Fare (£)","Klasse","Overleving","Ticket","Cabin"] if c in dvis.columns],
|
| 184 |
+
color_discrete_map={"Overleefd":"#1B4B91","Niet overleefd":"#A3B1C6"},
|
| 185 |
+
opacity=0.78
|
| 186 |
+
)
|
| 187 |
+
fig.update_traces(marker=dict(symbol="circle", size=8, line=dict(width=0.6, color="white")))
|
| 188 |
+
fig = make_plot(fig, f"2D projectie ({method}) — elk bolletje is een passagier")
|
| 189 |
+
|
| 190 |
+
progress(1.0, desc="Klaar ✅")
|
| 191 |
+
status = f"✅ Model getraind (**{acc:.2%}**). 2D-projectie ({method}) gereed — beweeg met je muis over de bolletjes voor details."
|
| 192 |
+
yield status, fig
|
| 193 |
|
| 194 |
# =========================
|
| 195 |
# Interactieve voorspelling + avontuur-tekst
|
| 196 |
# =========================
|
| 197 |
def predict_and_story(pclass, sex, age, sibsp, parch, fare, embarked):
|
| 198 |
+
global MODEL, MODEL_ACC
|
| 199 |
+
if MODEL is None:
|
| 200 |
+
return "⏳ Het model is nog niet klaar met initialiseren. Probeer het zo nog eens."
|
| 201 |
+
|
| 202 |
+
# Maak invoer-DataFrame; MODEL bevat ColumnTransformer in de pipeline
|
| 203 |
+
X_row = pd.DataFrame([{
|
| 204 |
+
"pclass": int(pclass),
|
| 205 |
+
"sex": sex,
|
| 206 |
+
"age": float(age),
|
| 207 |
+
"sibsp": int(sibsp),
|
| 208 |
+
"parch": int(parch),
|
| 209 |
+
"fare": float(fare),
|
| 210 |
+
"embarked": embarked,
|
| 211 |
+
"family_size": int(sibsp) + int(parch) + 1
|
| 212 |
+
}])
|
| 213 |
|
|
|
|
| 214 |
prob = float(MODEL.predict_proba(X_row)[0,1])
|
| 215 |
pct = prob * 100
|
| 216 |
|
| 217 |
klasse_txt = {1:"eerste", 2:"tweede", 3:"derde"}.get(int(pclass), "onbekende")
|
| 218 |
haven_txt = {"C":"Cherbourg","Q":"Queenstown","S":"Southampton"}.get(embarked, "een onbekende haven")
|
| 219 |
+
rol_txt = "vrouw" if sex.lower().startswith("v") else "man"
|
| 220 |
|
| 221 |
if pct >= 75:
|
| 222 |
tone = "Je kansen zijn uitzonderlijk goed."
|
|
|
|
| 236 |
|
| 237 |
**Situatie:** Je bent een **{rol_txt}** in de **{klasse_txt} klasse**, ingescheept in **{haven_txt}**.
|
| 238 |
Je bent **{int(age)}** jaar oud, reist met **{int(sibsp)}** broer(s)/zus(sen) en **{int(parch)}** ouder(s)/kind(eren).
|
| 239 |
+
Je ticket kostte **£{float(fare):.2f}** en je **familiegrootte** is **{int(sibsp)+int(parch)+1}**.
|
| 240 |
|
| 241 |
**Analyse:** {tone} Het model weegt o.a. klasse, geslacht, leeftijd en familieomvang mee—patronen in de historische data.
|
| 242 |
|
|
|
|
| 247 |
return story
|
| 248 |
|
| 249 |
# =========================
|
| 250 |
+
# Introductietekst (kort, aangepast)
|
| 251 |
# =========================
|
| 252 |
INTRO_MD = """
|
| 253 |
# 🛳️ Titanic Data Adventure
|
| 254 |
+
**Links** zie je de training live starten. We bouwen een model dat patronen uit 1912 leert.
|
| 255 |
+
Daaronder verschijnt een **2D-projectie**: elk **bolletje is één passagier** — beweeg erover voor details.
|
| 256 |
+
**Rechts** staat een visual van het schip ter context. Scrol daarna door voor meer grafieken en jouw persoonlijke scenario.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
"""
|
| 258 |
|
| 259 |
# =========================
|
|
|
|
| 276 |
}
|
| 277 |
.kpi .value { font-size:1.6rem; font-weight:800; color:#1B4B91; }
|
| 278 |
.kpi .label { font-size:.9rem; color:#3F557A; }
|
|
|
|
| 279 |
"""
|
| 280 |
|
| 281 |
with gr.Blocks(css=CUSTOM_CSS, theme=gr.themes.Default(primary_hue="blue")) as demo:
|
| 282 |
+
# Intro + foto
|
| 283 |
with gr.Row():
|
| 284 |
with gr.Column(scale=2, min_width=420):
|
| 285 |
+
gr.Markdown(INTRO_MD, elem_classes=["intro-card"])
|
| 286 |
with gr.Column(scale=1, min_width=320):
|
| 287 |
hero_path = get_hero_image_path()
|
| 288 |
if hero_path:
|
|
|
|
| 290 |
else:
|
| 291 |
gr.Markdown("⚠️ **Geen afbeelding gevonden.** Plaats `titanic_bg.png` of `titanic_bg.jpg` in de root.")
|
| 292 |
|
| 293 |
+
# Training & 2D-projectie
|
| 294 |
+
with gr.Column(elem_classes=["panel"]):
|
| 295 |
+
gr.Markdown("## 🔧 Initialisatie & Modeltraining")
|
| 296 |
+
status_md = gr.Markdown("⏳ Start…")
|
| 297 |
+
train_plot = gr.Plot(label="2D projectie — elk bolletje is een passagier")
|
| 298 |
+
|
| 299 |
# KPI's
|
| 300 |
with gr.Row():
|
| 301 |
gr.HTML(f"<div class='kpi'><div class='value'>{len(df):,}</div><div class='label'>Totaal passagiers</div></div>")
|
|
|
|
| 303 |
gr.HTML(f"<div class='kpi'><div class='value'>{df['survived'].mean()*100:.1f}%</div><div class='label'>% Overleefd</div></div>")
|
| 304 |
gr.HTML(f"<div class='kpi'><div class='value'>{', '.join(map(str, sorted(df['pclass'].unique())))}</div><div class='label'>Klassen</div></div>")
|
| 305 |
|
| 306 |
+
# Overige visualisaties
|
| 307 |
gr.Markdown("## 📊 Verken de data", elem_classes=["panel"])
|
| 308 |
with gr.Row():
|
|
|
|
| 309 |
g2 = gr.Plot(label="Leeftijdsverdeling per status")
|
|
|
|
| 310 |
g3 = gr.Plot(label="Geslachtsverdeling")
|
| 311 |
+
with gr.Row():
|
| 312 |
g4 = gr.Plot(label="Ticketprijs per klasse")
|
| 313 |
|
| 314 |
# Interactieve voorspelling
|
|
|
|
| 326 |
btn = gr.Button("🎲 Bereken én vertel mijn verhaal", variant="primary")
|
| 327 |
story_out = gr.Markdown()
|
| 328 |
|
| 329 |
+
# ================= Callbacks =================
|
| 330 |
+
# 1) Start training + 2D projectie (streamend via yields)
|
| 331 |
+
demo.load(
|
| 332 |
+
fn=train_and_embed,
|
| 333 |
+
inputs=[],
|
| 334 |
+
outputs=[status_md, train_plot]
|
| 335 |
+
)
|
| 336 |
+
|
| 337 |
+
# 2) Overige grafieken
|
| 338 |
def load_graphs():
|
| 339 |
return (
|
|
|
|
| 340 |
plot_leeftijdsverdeling(df),
|
| 341 |
plot_geslacht(df),
|
| 342 |
plot_fare_vs_klasse(df),
|
| 343 |
)
|
| 344 |
+
demo.load(load_graphs, [], [g2, g3, g4])
|
| 345 |
|
| 346 |
+
# 3) Interactieve voorspelling
|
| 347 |
btn.click(
|
| 348 |
predict_and_story,
|
| 349 |
inputs=[ui_pclass, ui_sex, ui_age, ui_sibsp, ui_parch, ui_fare, ui_emb],
|