File size: 15,740 Bytes
50bab12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9a4415
50bab12
 
 
 
 
 
 
 
a9a4415
50bab12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9a4415
 
 
 
 
 
50bab12
 
 
f20f878
58c3ee5
f20f878
 
58c3ee5
 
50bab12
58c3ee5
a9a4415
58c3ee5
 
a9a4415
50bab12
 
 
a9a4415
50bab12
 
a9a4415
50bab12
a9a4415
3744991
a9a4415
 
6b59dec
a9a4415
50bab12
 
 
 
 
 
 
a9a4415
3efa3c5
 
 
 
50bab12
 
 
 
 
e63502a
 
 
50bab12
 
 
 
e63502a
 
 
 
50bab12
 
 
 
 
 
a9a4415
 
 
5b1b6c8
a9a4415
5b1b6c8
 
 
 
50bab12
 
 
 
 
a9a4415
 
50bab12
 
 
e63502a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ebd976a
 
 
 
 
 
 
 
 
 
 
50bab12
 
 
 
 
 
 
 
 
 
 
 
 
 
58c3ee5
50bab12
58c3ee5
 
f20f878
58c3ee5
 
 
50bab12
dd0a6a7
3efa3c5
 
58c3ee5
dd0a6a7
 
 
3efa3c5
 
dd0a6a7
50bab12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ebd976a
 
 
 
 
8b81992
 
 
 
 
 
 
 
 
 
 
 
50bab12
8b81992
50bab12
8b81992
 
 
 
50bab12
8b81992
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ebd976a
 
50bab12
8b81992
 
 
50bab12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ebd976a
 
 
 
 
8b81992
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50bab12
8b81992
 
 
 
 
 
 
 
 
 
 
 
 
 
50bab12
 
 
 
 
 
 
 
 
a9a4415
50bab12
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
import marimo

__generated_with = "0.23.2"
app = marimo.App(
    width="full",
    app_title="Ethiopia Coffee Agroforests — Biodiversity vs Yield",
)


@app.cell
def _():
    import marimo as mo
    import pandas as pd
    import altair as alt
    import numpy as np
    from pathlib import Path

    return Path, alt, mo, np, pd


@app.cell
def _(mo):
    mo.md(r"""
    # Ethiopia Coffee Agroforests — Biodiversity vs Yield

    **Remote Group 2** · Alperen Aydos, Arif Erkovan, Melih Yilmaz
    Persona: *Sofia Almeida — biodiversity conservation activist*

    Three linked custom visualisations exploring the trade-off between coffee yield and
    plant biodiversity across 60 agroforest sites in the Ge and Go regions of Ethiopia.
    """)
    return


@app.cell
def _(Path, np, pd):
    # --- Data loading ---
    # Point DATA_PATH to your real CSV. If the file is missing, a synthetic dataset
    # that matches the described structure is generated so the notebook still runs.
    DATA_PATH = Path("data/coffee_sites.csv")

    if DATA_PATH.exists():
        df = pd.read_csv(DATA_PATH)
    else:
        rng = np.random.default_rng(42)
        n_ge, n_go = 30, 30
        ge = pd.DataFrame({
            "site_id": [f"Ge{i+1:02d}" for i in range(n_ge)],
            "region": "Ge",
            "mean_yield": rng.normal(886, 300, n_ge).clip(60, 2400),
            "woody_richness": rng.normal(22, 5, n_ge).clip(5, 40).round().astype(int),
            "herb_richness": rng.normal(48, 10, n_ge).clip(15, 90).round().astype(int),
            "bryophyte_richness": rng.normal(13, 4, n_ge).clip(2, 30).round().astype(int),
            "dominance": rng.uniform(0.7, 0.99, n_ge),
        })
        go = pd.DataFrame({
            "site_id": [f"Go{i+1:02d}" for i in range(n_go)],
            "region": "Go",
            "mean_yield": rng.normal(1158, 350, n_go).clip(60, 2400),
            "woody_richness": rng.normal(14, 4, n_go).clip(3, 30).round().astype(int),
            "herb_richness": rng.normal(35, 9, n_go).clip(10, 75).round().astype(int),
            "bryophyte_richness": rng.normal(8, 3, n_go).clip(2, 25).round().astype(int),
            "dominance": rng.uniform(0.92, 0.996, n_go),
        })
        df = pd.concat([ge, go], ignore_index=True)
        df["total_richness"] = df.woody_richness + df.herb_richness + df.bryophyte_richness
        df["yield_rank"] = df.mean_yield.rank(method="first").astype(int)

    if "total_richness" not in df.columns:
        df["total_richness"] = df.woody_richness + df.herb_richness + df.bryophyte_richness
    if "yield_rank" not in df.columns:
        df["yield_rank"] = df.mean_yield.rank(method="first").astype(int)

    df = df.sort_values("yield_rank").reset_index(drop=True)
    return (df,)


@app.cell
def _(df, mo):
    # --- Global controls (used by all three visuals) ---
    region_filter = mo.ui.multiselect(
        options=["Ge", "Go"], value=["Ge", "Go"], label="Region"
    )
    dominance_range = mo.ui.range_slider(
        start=float(df.dominance.min().round(2)),
        stop=float(df.dominance.max().round(2)),
        step=0.01,
        value=(float(df.dominance.min().round(2)), float(df.dominance.max().round(2))),
        label="Coffee dominance",
        show_value=True,
    )
    show_trend = mo.ui.checkbox(value=True, label="Show trend line (Visual 2)")
    site_options = sorted(df.site_id.unique(), key=lambda s: (s[:2], int(s[2:])))
    site_picker = mo.ui.dropdown(
        options=["(pick a site)"] + site_options,
        value="(pick a site)",
        label="Selected site",
    )

    mo.hstack(
        [region_filter, dominance_range, show_trend, site_picker],
        justify="start", gap=2,
    )
    return dominance_range, region_filter, show_trend, site_picker


@app.cell
def _(df, dominance_range, pd, region_filter):
    filtered = df[
        df.region.isin(region_filter.value)
        & df.dominance.between(dominance_range.value[0], dominance_range.value[1])
    ].copy()
    DOMINANCE_BINS = [0, 0.80, 0.90, 0.95, 1.01]
    DOMINANCE_LABELS = ["<0.80", "0.80-0.90", "0.90-0.95", ">0.95"]
    filtered["dominance_bucket"] = pd.cut(
        filtered["dominance"], bins=DOMINANCE_BINS, labels=DOMINANCE_LABELS
    ).astype(str)
    return DOMINANCE_LABELS, filtered


@app.cell
def _(mo):
    mo.md(r"""
    ## Visual 1 — Win-win scatter with linked species breakdown

    Yield × total species richness, coloured by region, sized by coffee dominance
    bucket. Median lines split the plot into four quadrants. Use the **"Selected
    site" dropdown** at the top to drill into a site's species composition (shown
    below the scatter); **click a bucket** in the dominance legend to filter the
    scatter to that management-intensity band.
    """)
    return


@app.cell
def _(DOMINANCE_LABELS, alt, df, filtered, mo, pd_DataFrame):
    yield_max = float(df.mean_yield.max()) * 1.05
    richness_max = float(df.total_richness.max()) * 1.1

    base = alt.Chart(filtered).properties(width=620, height=380)

    points = base.mark_circle(opacity=0.75, stroke="white", strokeWidth=0.5).encode(
        x=alt.X("mean_yield:Q", title="Mean yield (kg/ha)",
                scale=alt.Scale(domain=[0, yield_max])),
        y=alt.Y("total_richness:Q", title="Total species richness",
                scale=alt.Scale(domain=[0, richness_max])),
        color=alt.Color(
            "region:N",
            scale=alt.Scale(domain=["Ge", "Go"], range=["#1f77b4", "#ff7f0e"]),
            legend=alt.Legend(title="Region"),
        ),
        size=alt.Size(
            "dominance_bucket:N",
            scale=alt.Scale(domain=DOMINANCE_LABELS, range=[50, 80, 120, 180]),
            sort=DOMINANCE_LABELS,
            legend=alt.Legend(
                title="Coffee dominance",
                symbolFillColor="#555",
                symbolStrokeWidth=0,
                symbolOpacity=1,
            ),
        ),
        tooltip=[
            "site_id", "region",
            alt.Tooltip("mean_yield:Q", format=".0f", title="Yield (kg/ha)"),
            "total_richness", "woody_richness", "herb_richness", "bryophyte_richness",
            alt.Tooltip("dominance:Q", format=".3f"),
            alt.Tooltip("dominance_bucket:N", title="Dominance bucket"),
        ],
    )

    scatter = points
    if len(filtered) > 0:
        median_yield = float(filtered.mean_yield.median())
        median_richness = float(filtered.total_richness.median())
        vline = alt.Chart(filtered).mark_rule(
            strokeDash=[4, 4], color="gray"
        ).encode(x=alt.datum(median_yield))
        hline = alt.Chart(filtered).mark_rule(
            strokeDash=[4, 4], color="gray"
        ).encode(y=alt.datum(median_richness))
        scatter = scatter + vline + hline

        ymin = float(filtered.mean_yield.min())
        ymax = float(filtered.mean_yield.max())
        rmin = float(filtered.total_richness.min())
        rmax = float(filtered.total_richness.max())
        right_labels = alt.Chart(pd_DataFrame([
            {"x": ymax, "y": rmax, "label": "Win-win"},
            {"x": ymax, "y": rmin, "label": "Concern"},
        ])).mark_text(
            fontSize=11, fontWeight="bold", color="#555", align="right", dx=-4, dy=0
        ).encode(x="x:Q", y="y:Q", text="label:N")
        left_labels = alt.Chart(pd_DataFrame([
            {"x": ymin, "y": rmax, "label": "Low yield / rich"},
            {"x": ymin, "y": rmin, "label": "Low yield / poor"},
        ])).mark_text(
            fontSize=11, fontWeight="bold", color="#555", align="left", dx=4, dy=0
        ).encode(x="x:Q", y="y:Q", text="label:N")
        scatter = scatter + right_labels + left_labels
        chart1 = mo.ui.altair_chart(
            scatter, chart_selection="point",
            legend_selection=["region", "dominance_bucket"],
        )
    else:
        # When the user's slider+region filter excludes every site, building a
        # mo.ui.altair_chart with chart_selection / legend_selection on an empty
        # dataframe causes the marimo kernel to return 500 (the Vega selection
        # cannot be registered against zero rows). Render a placeholder chart
        # with no selections so the dropdown still works downstream.
        chart1 = mo.ui.altair_chart(scatter)
    chart1
    return (chart1,)


@app.cell
def _(pd):
    # helper so the cell above can build a DataFrame inline
    def pd_DataFrame(x):
        return pd.DataFrame(x)

    return (pd_DataFrame,)


@app.cell
def _(alt, chart1, df, mo, pd, site_picker):
    # --- Linked species breakdown for the selected site(s) ---
    # Prefer the dropdown picker; fall back to the scatter click selection.
    picked = site_picker.value
    if picked and picked != "(pick a site)":
        sel = df[df.site_id == picked]
    else:
        sel = chart1.value  # pandas DataFrame of currently selected rows
    if sel is None or len(sel) == 0:
        detail = mo.md(
            "*Pick a site from the **\"Selected site\" dropdown** at the top to see its "
            "species composition.*"
        )
    elif len(sel) > 1:
        detail = mo.md(
            f"*{len(sel)} sites currently match the active filters. "
            "Pick one from the **\"Selected site\" dropdown** at the top to see its species "
            "composition.*"
        )
    else:
        site = sel.iloc[0]
        long_df = pd.DataFrame({
            "group": ["Woody", "Herbaceous", "Bryophyte"],
            "count": [site.woody_richness, site.herb_richness, site.bryophyte_richness],
        })
        bar = alt.Chart(long_df).mark_bar().encode(
            x=alt.X("count:Q", title="Species count"),
            y=alt.Y("group:N", sort=["Woody", "Herbaceous", "Bryophyte"], title=None),
            color=alt.Color(
                "group:N",
                scale=alt.Scale(
                    domain=["Woody", "Herbaceous", "Bryophyte"],
                    range=["#2ca02c", "#d62728", "#1f77b4"],
                ),
                legend=None,
            ),
            tooltip=["group", "count"],
        ).properties(
            width=450, height=140,
            title=f"Site {site.site_id} · {site.region} · yield {site.mean_yield:.0f} kg/ha "
                  f"· total richness {int(site.total_richness)}"
        )
        detail = mo.ui.altair_chart(bar)
    detail
    return


@app.cell
def _(mo):
    mo.md(r"""
    ## Visual 2 — Small multiples: yield × richness per species group

    Three panels sharing the same x-axis (yield). The slopes differ dramatically between
    groups: woody responds strongly to yield pressure, bryophytes almost not at all.
    """)
    return


@app.cell
def _(alt, filtered, mo, pd, show_trend):
    if len(filtered) == 0:
        small_multiples = mo.md(
            "*No sites match the current filters — adjust the dominance range above.*"
        )
    else:
        long = pd.melt(
            filtered,
            id_vars=["site_id", "region", "mean_yield"],
            value_vars=["woody_richness", "herb_richness", "bryophyte_richness"],
            var_name="group", value_name="richness",
        )
        long["group"] = long["group"].map({
            "woody_richness": "Woody",
            "herb_richness": "Herbaceous",
            "bryophyte_richness": "Bryophyte",
        })

        panel_selection = alt.selection_point(fields=["site_id"], on="mouseover", empty=False)

        base_sm = alt.Chart(long).encode(
            x=alt.X("mean_yield:Q", title="Mean yield (kg/ha)"),
            y=alt.Y("richness:Q", title="Richness"),
        )

        pts = base_sm.mark_circle(size=70, opacity=0.7).encode(
            color=alt.Color(
                "region:N",
                scale=alt.Scale(domain=["Ge", "Go"], range=["#1f77b4", "#ff7f0e"]),
            ),
            opacity=alt.condition(panel_selection, alt.value(1.0), alt.value(0.35)),
            stroke=alt.condition(panel_selection, alt.value("black"), alt.value(None)),
            tooltip=["site_id", "region", "mean_yield", "richness"],
        ).add_params(panel_selection)

        if show_trend.value and len(long) >= 2:
            xmin = float(long.mean_yield.min())
            xmax = float(long.mean_yield.max())
            if xmax > xmin:
                trend = base_sm.transform_regression(
                    "mean_yield", "richness", groupby=["group"], extent=[xmin, xmax]
                ).mark_line(color="black", strokeDash=[4, 2])
                layer = pts + trend
            else:
                layer = pts
        else:
            layer = pts

        small_multiples = layer.properties(width=240, height=240).facet(
            column=alt.Column("group:N", sort=["Woody", "Herbaceous", "Bryophyte"], title=None)
        ).resolve_scale(y="independent")
    small_multiples
    return


@app.cell
def _(mo):
    mo.md(r"""
    ## Visual 3 — Stacked glyph array sorted by yield (novel design)

    One glyph per site, arranged left-to-right by yield rank (lowest → highest).
    Glyph height encodes total species richness; colours encode the three species groups.
    The expected *wedge* — tall on the left, shrinking to the right — is the visual
    signature of selective biodiversity loss.
    """)
    return


@app.cell
def _(alt, filtered, mo, pd):
    if len(filtered) == 0:
        combined = mo.md(
            "*No sites match the current filters — adjust the dominance range above.*"
        )
    else:
        glyph_long = pd.melt(
            filtered,
            id_vars=["site_id", "region", "mean_yield", "yield_rank"],
            value_vars=["woody_richness", "herb_richness", "bryophyte_richness"],
            var_name="group", value_name="count",
        )
        group_map = {
            "woody_richness": "Woody",
            "herb_richness": "Herbaceous",
            "bryophyte_richness": "Bryophyte",
        }
        glyph_long["group"] = glyph_long["group"].map(group_map)

        glyphs = alt.Chart(glyph_long).mark_bar(size=10).encode(
            x=alt.X("yield_rank:O", title="Site rank (low → high yield)",
                    axis=alt.Axis(labels=False, ticks=False)),
            y=alt.Y("count:Q", stack="zero", title="Species richness"),
            color=alt.Color(
                "group:N",
                scale=alt.Scale(
                    domain=["Woody", "Herbaceous", "Bryophyte"],
                    range=["#2ca02c", "#d62728", "#1f77b4"],
                ),
                legend=alt.Legend(title="Species group"),
            ),
            order=alt.Order("group:N", sort="ascending"),
            tooltip=[
                "site_id", "region", "yield_rank",
                alt.Tooltip("mean_yield:Q", format=".0f"),
                "group", "count",
            ],
        ).properties(width=720, height=240)

        yield_line = alt.Chart(filtered).mark_line(color="gray", strokeWidth=1).encode(
            x=alt.X("yield_rank:O", axis=None),
            y=alt.Y("mean_yield:Q", title="Yield (kg/ha)"),
        ).properties(width=720, height=60)

        combined = alt.vconcat(yield_line, glyphs).resolve_scale(x="shared")
    combined
    return


@app.cell
def _(mo):
    mo.md(r"""
    ---
    ### Notes for reviewers
    - All three visuals share the region filter and dominance slider at the top.
    - Visual 1 and Visual 3 are linked through site identity: selecting a point in
      Visual 1 highlights its species breakdown; site ordering in Visual 3 preserves
      the identity of each point so you can cross-reference.
    - The full design rationale, including the NUF scoring of all ten diverge sketches,
      is in the accompanying Part 3 report.
    """)
    return


if __name__ == "__main__":
    app.run()