Marcel0123 commited on
Commit
b9ea7ea
·
verified ·
1 Parent(s): 6873476

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +240 -106
app.py CHANGED
@@ -1,32 +1,123 @@
 
 
1
  import gradio as gr
2
  import pandas as pd
 
 
3
  import plotly.express as px
 
4
  from sklearn.model_selection import train_test_split
 
 
 
5
  from sklearn.ensemble import RandomForestClassifier
6
- from sklearn.preprocessing import LabelEncoder
7
-
8
- # -------------------------
9
- # DATA INLADEN
10
- # -------------------------
11
- titanic = pd.read_csv("Titanic-Dataset.csv")
12
-
13
- # Verwerk dataset
14
- titanic = titanic.dropna(subset=["Age", "Sex", "Pclass", "Survived"])
15
- titanic["Sex"] = titanic["Sex"].map({"male": 0, "female": 1})
16
- X = titanic[["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare"]]
17
- y = titanic["Survived"]
18
-
19
- # -------------------------
20
- # MODEL TRAINEN
21
- # -------------------------
22
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
23
- model = RandomForestClassifier(random_state=42)
24
- model.fit(X_train, y_train)
25
- accuracy = model.score(X_test, y_test)
26
-
27
- # -------------------------
28
- # TITELS & TEKSTEN
29
- # -------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  INTRO_MD = """
31
  # 🛳️ Titanic Data Adventure
32
  ### Een datagedreven reis door hoop, hiërarchie en toeval
@@ -52,6 +143,9 @@ In deze applicatie duiken we opnieuw die nacht in – niet met reddingsvesten, m
52
  die het menselijk verhaal achter de ramp zichtbaar maken.
53
  """
54
 
 
 
 
55
  EXPLAIN_MD_SIDE = """
56
  ### 📘 Wat je ziet
57
  Bij het opstarten traint de computer een **RandomForest-model** dat leert wie op de Titanic **overleefde** – en waarom.
@@ -68,91 +162,131 @@ Elk **bolletje** is één persoon. Met **PCA** brengen we veel kenmerken terug n
68
  Dichter bij elkaar = vergelijkbare profielen. **Hover** voor details.
69
  """
70
 
71
- SCENARIO_INTRO = """
72
- > Stel je voor: het is april 1912.
73
- > De nacht is helder, de zee kalm, de lichten van de Titanic glinsteren als sterren op het water.
74
- > Maar wat als jij aan boord was?
75
- > Kies jouw plek, leeftijd en omstandigheden — en ontdek hoe groot jouw kans was om het avontuur te overleven.
76
- >
77
- > De cijfers vertellen het verhaal.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  """
79
 
80
- # -------------------------
81
- # VISUALISATIES
82
- # -------------------------
83
- fig_age = px.histogram(titanic, x="Age", color="Survived",
84
- color_discrete_map={0: "#8d99ae", 1: "#0077b6"},
85
- nbins=30, title="Leeftijdsverdeling naar overleving")
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
- # 2D PCA-achtige projectie met willekeurige jitter
88
- import numpy as np
89
- np.random.seed(42)
90
- titanic["x"] = np.random.randn(len(titanic))
91
- titanic["y"] = np.random.randn(len(titanic))
92
- fig_scatter = px.scatter(
93
- titanic, x="x", y="y",
94
- color=titanic["Survived"].map({0: "Niet overleefd", 1: "Overleefd"}),
95
- hover_data=["Sex", "Age", "Pclass", "Fare"],
96
- title=f"Model getraind (RandomForest) — nauwkeurigheid: {accuracy*100:.2f}%",
97
- color_discrete_map={"Niet overleefd": "#adb5bd", "Overleefd": "#0077b6"},
98
- opacity=0.75
99
- )
100
- fig_scatter.update_traces(marker=dict(size=8, line=dict(width=0.5, color='white')))
101
-
102
- # -------------------------
103
- # INTERACTIEF SCENARIO
104
- # -------------------------
105
- def predict_survival(pclass, sex, age, sibsp, parch, fare):
106
- data = pd.DataFrame([[pclass, sex, age, sibsp, parch, fare]],
107
- columns=["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare"])
108
- prediction = model.predict(data)[0]
109
- prob = model.predict_proba(data)[0][prediction]
110
- result = "🟦 Overleefd" if prediction == 1 else "⬜ Niet overleefd"
111
- text = f"{result}\n\nVoorspelde kans: {prob*100:.1f}%"
112
- return text
113
-
114
- # -------------------------
115
- # INTERFACE
116
- # -------------------------
117
- with gr.Blocks(css="body {background-color: white;}") as demo:
118
- gr.Markdown("<h1 style='text-align:center; color:#003366;'>Titanic Data Adventure</h1>")
119
-
120
  with gr.Row():
121
- with gr.Column(scale=1):
122
- gr.Image("titanic_bg.png", show_label=False)
123
- with gr.Column(scale=1):
124
- gr.Markdown(INTRO_MD)
125
-
126
- gr.Markdown("---")
127
-
128
  with gr.Row():
129
- with gr.Column(scale=1):
130
- gr.Plot(fig_scatter)
131
- with gr.Column(scale=1):
132
- gr.Markdown(EXPLAIN_MD_SIDE)
133
-
134
- gr.Markdown("---")
135
- gr.Markdown("## 🔮 Jouw scenario — bereken je overlevingskans en lees je scène")
136
- gr.Markdown(SCENARIO_INTRO)
137
-
138
  with gr.Row():
139
- pclass = gr.Dropdown([1, 2, 3], label="Klasse (1 = luxe, 3 = economy)")
140
- sex = gr.Radio(["man", "vrouw"], label="Geslacht", value="man")
141
- age = gr.Slider(0, 80, value=30, label="Leeftijd")
142
- sibsp = gr.Slider(0, 5, value=0, label="Aantal broers/zussen of echtgeno(o)t(e)")
143
- parch = gr.Slider(0, 5, value=0, label="Aantal ouders/kinderen")
144
- fare = gr.Slider(0, 500, value=50, label="Ticketprijs (£)")
145
-
146
- sex_map = {"man": 0, "vrouw": 1}
147
- btn = gr.Button("🚢 Bereken mijn overlevingskans")
148
- output = gr.Textbox(label="Resultaat", lines=2)
149
-
150
- btn.click(fn=lambda p, s, a, si, pa, f: predict_survival(p, sex_map[s], a, si, pa, f),
151
- inputs=[pclass, sex, age, sibsp, parch, fare],
152
- outputs=output)
153
-
154
- # -------------------------
155
- # LAUNCH
156
- # -------------------------
157
- if __name__ == "__main__":
158
- demo.launch()
 
 
 
 
 
 
1
+ # app.py — Titanic Data Adventure (met uitgebreide introductie naast foto)
2
+
3
  import gradio as gr
4
  import pandas as pd
5
+ import numpy as np
6
+ import os
7
  import plotly.express as px
8
+
9
  from sklearn.model_selection import train_test_split
10
+ from sklearn.preprocessing import OneHotEncoder, StandardScaler
11
+ from sklearn.compose import ColumnTransformer
12
+ from sklearn.pipeline import Pipeline
13
  from sklearn.ensemble import RandomForestClassifier
14
+ from sklearn.decomposition import PCA
15
+
16
+ # ======================================================
17
+ # DATA LADEN
18
+ # ======================================================
19
+ REQUIRED = {"survived","pclass","sex","age","sibsp","parch","fare","embarked"}
20
+
21
+ def load_data(path="Titanic-Dataset.csv"):
22
+ if not os.path.exists(path):
23
+ raise FileNotFoundError(" Titanic-Dataset.csv niet gevonden in de rootmap.")
24
+ df = pd.read_csv(path)
25
+ df.columns = [c.lower().strip() for c in df.columns]
26
+ missing = REQUIRED - set(df.columns)
27
+ if missing:
28
+ raise ValueError(f"Ontbrekende kolommen: {', '.join(sorted(missing))}")
29
+ for c in df.columns:
30
+ if df[c].isna().any():
31
+ df[c] = df[c].fillna(df[c].mode()[0] if df[c].dtype=='O' else df[c].median())
32
+ df["family_size"] = df["sibsp"] + df["parch"] + 1
33
+ df["status"] = df["survived"].map({0:"Niet overleefd", 1:"Overleefd"})
34
+ df["sex"] = df["sex"].astype(str).str.title()
35
+ df["embarked"] = df["embarked"].astype(str).str.upper()
36
+ return df
37
+
38
+ df = load_data()
39
+ MODEL = None
40
+ MODEL_ACC = None
41
+
42
+ # ======================================================
43
+ # HULPFUNCTIES
44
+ # ======================================================
45
+ def hero_path():
46
+ for n in ["titanic_bg.png","titanic_bg.jpg","titanic_bg.jpeg"]:
47
+ if os.path.exists(n):
48
+ return n
49
+ return None
50
+
51
+ def make_plot(fig, title):
52
+ fig.update_layout(
53
+ title=title,
54
+ paper_bgcolor="rgba(255,255,255,0)",
55
+ plot_bgcolor="rgba(255,255,255,0)",
56
+ font=dict(color="#0B1C3F"),
57
+ title_font=dict(size=18, color="#1B4B91"),
58
+ margin=dict(l=40, r=40, t=50, b=40),
59
+ legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
60
+ )
61
+ return fig
62
+
63
+ # ======================================================
64
+ # MODELTRAINING + 2D VISUALISATIE
65
+ # ======================================================
66
+ def train_and_embed_solid():
67
+ global MODEL, MODEL_ACC
68
+ features = ["pclass","sex","age","sibsp","parch","fare","embarked","family_size"]
69
+ X = df[features].copy()
70
+ y = df["survived"].astype(int)
71
+
72
+ cat_cols = ["sex","embarked"]
73
+ num_cols = [c for c in features if c not in cat_cols]
74
+
75
+ pre = ColumnTransformer([
76
+ ("num", StandardScaler(), num_cols),
77
+ ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
78
+ ])
79
+
80
+ pipe = Pipeline([
81
+ ("prep", pre),
82
+ ("clf", RandomForestClassifier(n_estimators=300, random_state=42))
83
+ ])
84
+
85
+ Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)
86
+ pipe.fit(Xtr, ytr)
87
+ MODEL = pipe
88
+ MODEL_ACC = pipe.score(Xte, yte)
89
+
90
+ Z = pre.fit_transform(X)
91
+ Z = Z.toarray() if hasattr(Z, "toarray") else Z
92
+ emb = PCA(n_components=2, random_state=42).fit_transform(Z)
93
+
94
+ dvis = pd.DataFrame({"x": emb[:,0], "y": emb[:,1]})
95
+ dvis["Overleving"] = df["status"].values
96
+ dvis["Geslacht"] = df["sex"].values
97
+ dvis["Klasse"] = df["pclass"].values
98
+ dvis["Leeftijd"] = df["age"].values
99
+ dvis["Fare (£)"] = df["fare"].values
100
+ dvis["Familie"] = df["family_size"].values
101
+ for c in ["name","ticket","cabin"]:
102
+ if c in df.columns:
103
+ dvis[c.capitalize()] = df[c].values
104
+
105
+ fig = px.scatter(
106
+ dvis, x="x", y="y",
107
+ color="Overleving", symbol="Klasse",
108
+ hover_data=[col for col in dvis.columns if col not in ["x","y"]],
109
+ color_discrete_map={"Overleefd":"#1B4B91","Niet overleefd":"#A3B1C6"},
110
+ opacity=0.8
111
+ )
112
+ fig.update_traces(marker=dict(symbol="circle", size=8, line=dict(width=0.6, color="white")))
113
+ fig = make_plot(fig, "2D-projectie (PCA) — elk bolletje is een passagier")
114
+
115
+ status = f"✅ Model getraind (RandomForest) — nauwkeurigheid: **{MODEL_ACC:.2%}**. 2D-projectie gereed; hover voor details."
116
+ return status, fig
117
+
118
+ # ======================================================
119
+ # TEKST VOOR INTRODUCTIE (UITGEBREID)
120
+ # ======================================================
121
  INTRO_MD = """
122
  # 🛳️ Titanic Data Adventure
123
  ### Een datagedreven reis door hoop, hiërarchie en toeval
 
143
  die het menselijk verhaal achter de ramp zichtbaar maken.
144
  """
145
 
146
+ # ======================================================
147
+ # UITLEGTEKST NAAST DE 2D-PLOT
148
+ # ======================================================
149
  EXPLAIN_MD_SIDE = """
150
  ### 📘 Wat je ziet
151
  Bij het opstarten traint de computer een **RandomForest-model** dat leert wie op de Titanic **overleefde** – en waarom.
 
162
  Dichter bij elkaar = vergelijkbare profielen. **Hover** voor details.
163
  """
164
 
165
+ # ======================================================
166
+ # OVERIGE GRAFIEKEN
167
+ # ======================================================
168
+ def plot_age_hist(dfx):
169
+ f = px.histogram(dfx, x="age", color="status", nbins=30, barmode="overlay", opacity=0.75,
170
+ color_discrete_map={"Overleefd":"#1B4B91","Niet overleefd":"#A3B1C6"})
171
+ return make_plot(f, "Leeftijdsverdeling per overlevingsstatus")
172
+
173
+ def plot_gender(dfx):
174
+ f = px.pie(dfx, names="sex", color="sex",
175
+ color_discrete_map={"Male":"#A3B1C6","Female":"#1B4B91"}, hole=0.35)
176
+ return make_plot(f, "Verdeling geslacht (alle passagiers)")
177
+
178
+ def plot_fare_box(dfx):
179
+ f = px.box(dfx, x="pclass", y="fare", color="status",
180
+ color_discrete_map={"Overleefd":"#1B4B91","Niet overleefd":"#A3B1C6"})
181
+ return make_plot(f, "Ticketprijs per klasse (met overleving)")
182
+
183
+ # ======================================================
184
+ # INTERACTIEVE VOORSPELLING
185
+ # ======================================================
186
+ def predict_and_story(pclass, sex, age, sibsp, parch, fare, embarked):
187
+ if MODEL is None:
188
+ return "⏳ Het model initialiseert nog. Probeer het zo nog eens."
189
+ X_row = pd.DataFrame([{
190
+ "pclass": int(pclass), "sex": sex, "age": float(age),
191
+ "sibsp": int(sibsp), "parch": int(parch), "fare": float(fare),
192
+ "embarked": embarked, "family_size": int(sibsp)+int(parch)+1
193
+ }])
194
+ prob = float(MODEL.predict_proba(X_row)[0,1]); pct = prob*100
195
+ klasse_txt = {1:"eerste",2:"tweede",3:"derde"}[int(pclass)]
196
+ haven_txt = {"C":"Cherbourg","Q":"Queenstown","S":"Southampton"}[embarked]
197
+ rol_txt = "vrouw" if sex.lower().startswith("v") else "man"
198
+ if pct>=75:
199
+ tone, ending = ("Je kansen zijn uitzonderlijk goed.",
200
+ "Je bereikt de sloep; het schip helt achter je, maar je leeft.")
201
+ elif pct>=50:
202
+ tone, ending = ("Je kansen zijn behoorlijk goed.",
203
+ "In de chaos vind je een plek in een halfgevulde sloep.")
204
+ elif pct>=25:
205
+ tone, ending = ("De kansen zijn fifty-fifty.",
206
+ "Op het laatste moment spring je; de nacht is lang, maar de horizon gloeit.")
207
+ else:
208
+ tone, ending = ("Het ziet er somber uit.",
209
+ "Je klampt je vast terwijl de oceaan meedogenloos wordt.")
210
+ return f"""### 🔮 Jouw overlevingskans: **{pct:.1f}%**
211
+
212
+ **Situatie:** {rol_txt}, **{klasse_txt} klasse**, inscheping **{haven_txt}** — leeftijd **{int(age)}**, familie **{int(sibsp)}+{int(parch)}** (totaal {int(sibsp)+int(parch)+1}), ticket **£{float(fare):.2f}**.
213
+
214
+ **Analyse:** {tone} Het model weegt o.a. klasse, geslacht, leeftijd en familieomvang mee.
215
+
216
+ **Avontuur:** De nacht is stil; fluiten, geroep, voetstappen. {ending}
217
+ """
218
+
219
+ # ======================================================
220
+ # UI + LAYOUT
221
+ # ======================================================
222
+ CUSTOM_CSS = """
223
+ body { background:#FFFFFF; color:#0B1C3F; }
224
+ .gradio-container { background:#FFFFFF; }
225
+ h1, h2, h3, h4 { color:#1B4B91; }
226
+ .panel, .intro-card { background:#F9FBFF; border:1px solid #E0E6F3; border-radius:12px; padding:16px; }
227
+ .hero-img img { border-radius:12px; border:1px solid #E0E6F3; }
228
+ .kpi { display:flex; flex-direction:column; align-items:center; justify-content:center;
229
+ background:#FFFFFF; border:1px solid #E0E6F3; border-radius:12px; padding:14px; }
230
+ .kpi .value { font-size:1.6rem; font-weight:800; color:#1B4B91; }
231
+ .kpi .label { font-size:.9rem; color:#3F557A; }
232
+ .explain-card { background:#EAF0FF; border-radius:12px; padding:18px; border:1px solid #D5E0FA; }
233
  """
234
 
235
+ with gr.Blocks(css=CUSTOM_CSS, theme=gr.themes.Default(primary_hue="blue")) as demo:
236
+ # Header-intro + foto
237
+ with gr.Row():
238
+ with gr.Column(scale=2, min_width=420):
239
+ gr.Markdown(INTRO_MD, elem_classes=["intro-card"])
240
+ with gr.Column(scale=1, min_width=320):
241
+ hp = hero_path()
242
+ if hp: gr.Image(value=hp, interactive=False, show_label=False, elem_classes=["hero-img"])
243
+ else: gr.Markdown("⚠️ **Geen afbeelding gevonden.** Plaats `titanic_bg.png` of `titanic_bg.jpg` in de root.")
244
+
245
+ # Panel: status + 2D-plot links en uitleg rechts
246
+ with gr.Column(elem_classes=["panel"]):
247
+ gr.Markdown("## 🔧 Initialisatie & Modeltraining")
248
+ status_md = gr.Markdown("⏳ Initialiseren…")
249
+ with gr.Row():
250
+ with gr.Column(scale=2, min_width=420):
251
+ train_plot = gr.Plot(label="2D-projectie — elk bolletje is een passagier")
252
+ with gr.Column(scale=1, min_width=320):
253
+ gr.Markdown(EXPLAIN_MD_SIDE, elem_classes=["explain-card"])
254
 
255
+ # KPIs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  with gr.Row():
257
+ gr.HTML(f"<div class='kpi'><div class='value'>{len(df):,}</div><div class='label'>Totaal passagiers</div></div>")
258
+ gr.HTML(f"<div class='kpi'><div class='value'>{int(df['survived'].sum()):,}</div><div class='label'>Overlevenden</div></div>")
259
+ gr.HTML(f"<div class='kpi'><div class='value'>{df['survived'].mean()*100:.1f}%</div><div class='label'>% Overleefd</div></div>")
260
+ gr.HTML(f"<div class='kpi'><div class='value'>{', '.join(map(str, sorted(df['pclass'].unique())))}</div><div class='label'>Klassen</div></div>")
261
+
262
+ # Overige visualisaties
263
+ gr.Markdown("## 📊 Verken de data", elem_classes=["panel"])
264
  with gr.Row():
265
+ g2 = gr.Plot(label="Leeftijdsverdeling per status")
266
+ g3 = gr.Plot(label="Geslachtsverdeling")
 
 
 
 
 
 
 
267
  with gr.Row():
268
+ g4 = gr.Plot(label="Ticketprijs per klasse")
269
+
270
+ # Interactieve voorspelling
271
+ with gr.Column(elem_classes=["panel"]):
272
+ gr.Markdown("## 🔮 Jouw scenario — bereken je overlevingskans en lees je scène")
273
+ with gr.Row():
274
+ ui_pclass = gr.Slider(1, 3, value=2, step=1, label="Klasse (1=1e, 3=3e)")
275
+ ui_sex = gr.Radio(["Man","Vrouw"], value="Man", label="Geslacht")
276
+ ui_age = gr.Slider(0, 80, value=30, label="Leeftijd")
277
+ with gr.Row():
278
+ ui_sibsp = gr.Slider(0, 8, value=1, step=1, label="Broers/Zussen aan boord")
279
+ ui_parch = gr.Slider(0, 6, value=0, step=1, label="Ouders/Kinder(en) aan boord")
280
+ ui_fare = gr.Slider(0, 600, value=50, label="Ticketprijs (£)")
281
+ ui_emb = gr.Radio(["C","Q","S"], value="S", label="Vertrekhaven")
282
+ btn = gr.Button("🎲 Bereken én vertel mijn verhaal", variant="primary")
283
+ story_out = gr.Markdown()
284
+
285
+ # Loads & acties
286
+ demo.load(fn=train_and_embed_solid, inputs=[], outputs=[status_md, train_plot])
287
+ demo.load(lambda: (plot_age_hist(df), plot_gender(df), plot_fare_box(df)), inputs=[], outputs=[g2, g3, g4])
288
+ btn.click(predict_and_story,
289
+ inputs=[ui_pclass, ui_sex, ui_age, ui_sibsp, ui_parch, ui_fare, ui_emb],
290
+ outputs=story_out)
291
+
292
+ demo.launch()