import marimo __generated_with = "0.23.2" app = marimo.App( width="full", app_title="Ethiopia Coffee Agroforests — Biodiversity vs Yield", ) @app.cell def _(): import marimo as mo import pandas as pd import altair as alt import numpy as np from pathlib import Path return Path, alt, mo, np, pd @app.cell def _(mo): mo.md(r""" # Ethiopia Coffee Agroforests — Biodiversity vs Yield **Remote Group 2** · Alperen Aydos, Arif Erkovan, Melih Yilmaz Persona: *Sofia Almeida — biodiversity conservation activist* Three linked custom visualisations exploring the trade-off between coffee yield and plant biodiversity across 60 agroforest sites in the Ge and Go regions of Ethiopia. """) return @app.cell def _(Path, np, pd): # --- Data loading --- # Point DATA_PATH to your real CSV. If the file is missing, a synthetic dataset # that matches the described structure is generated so the notebook still runs. DATA_PATH = Path("data/coffee_sites.csv") if DATA_PATH.exists(): df = pd.read_csv(DATA_PATH) else: rng = np.random.default_rng(42) n_ge, n_go = 30, 30 ge = pd.DataFrame({ "site_id": [f"Ge{i+1:02d}" for i in range(n_ge)], "region": "Ge", "mean_yield": rng.normal(886, 300, n_ge).clip(60, 2400), "woody_richness": rng.normal(22, 5, n_ge).clip(5, 40).round().astype(int), "herb_richness": rng.normal(48, 10, n_ge).clip(15, 90).round().astype(int), "bryophyte_richness": rng.normal(13, 4, n_ge).clip(2, 30).round().astype(int), "dominance": rng.uniform(0.7, 0.99, n_ge), }) go = pd.DataFrame({ "site_id": [f"Go{i+1:02d}" for i in range(n_go)], "region": "Go", "mean_yield": rng.normal(1158, 350, n_go).clip(60, 2400), "woody_richness": rng.normal(14, 4, n_go).clip(3, 30).round().astype(int), "herb_richness": rng.normal(35, 9, n_go).clip(10, 75).round().astype(int), "bryophyte_richness": rng.normal(8, 3, n_go).clip(2, 25).round().astype(int), "dominance": rng.uniform(0.92, 0.996, n_go), }) df = pd.concat([ge, go], ignore_index=True) df["total_richness"] = df.woody_richness + df.herb_richness + df.bryophyte_richness df["yield_rank"] = df.mean_yield.rank(method="first").astype(int) if "total_richness" not in df.columns: df["total_richness"] = df.woody_richness + df.herb_richness + df.bryophyte_richness if "yield_rank" not in df.columns: df["yield_rank"] = df.mean_yield.rank(method="first").astype(int) df = df.sort_values("yield_rank").reset_index(drop=True) return (df,) @app.cell def _(df, mo): # --- Global controls (used by all three visuals) --- region_filter = mo.ui.multiselect( options=["Ge", "Go"], value=["Ge", "Go"], label="Region" ) dominance_range = mo.ui.range_slider( start=float(df.dominance.min().round(2)), stop=float(df.dominance.max().round(2)), step=0.01, value=(float(df.dominance.min().round(2)), float(df.dominance.max().round(2))), label="Coffee dominance", show_value=True, ) show_trend = mo.ui.checkbox(value=True, label="Show trend line (Visual 2)") site_options = sorted(df.site_id.unique(), key=lambda s: (s[:2], int(s[2:]))) site_picker = mo.ui.dropdown( options=["(pick a site)"] + site_options, value="(pick a site)", label="Selected site", ) mo.hstack( [region_filter, dominance_range, show_trend, site_picker], justify="start", gap=2, ) return dominance_range, region_filter, show_trend, site_picker @app.cell def _(df, dominance_range, pd, region_filter): filtered = df[ df.region.isin(region_filter.value) & df.dominance.between(dominance_range.value[0], dominance_range.value[1]) ].copy() DOMINANCE_BINS = [0, 0.80, 0.90, 0.95, 1.01] DOMINANCE_LABELS = ["<0.80", "0.80-0.90", "0.90-0.95", ">0.95"] filtered["dominance_bucket"] = pd.cut( filtered["dominance"], bins=DOMINANCE_BINS, labels=DOMINANCE_LABELS ).astype(str) return DOMINANCE_LABELS, filtered @app.cell def _(mo): mo.md(r""" ## Visual 1 — Win-win scatter with linked species breakdown Yield × total species richness, coloured by region, sized by coffee dominance bucket. Median lines split the plot into four quadrants. Use the **"Selected site" dropdown** at the top to drill into a site's species composition (shown below the scatter); **click a bucket** in the dominance legend to filter the scatter to that management-intensity band. """) return @app.cell def _(DOMINANCE_LABELS, alt, df, filtered, mo, pd_DataFrame): yield_max = float(df.mean_yield.max()) * 1.05 richness_max = float(df.total_richness.max()) * 1.1 base = alt.Chart(filtered).properties(width=620, height=380) points = base.mark_circle(opacity=0.75, stroke="white", strokeWidth=0.5).encode( x=alt.X("mean_yield:Q", title="Mean yield (kg/ha)", scale=alt.Scale(domain=[0, yield_max])), y=alt.Y("total_richness:Q", title="Total species richness", scale=alt.Scale(domain=[0, richness_max])), color=alt.Color( "region:N", scale=alt.Scale(domain=["Ge", "Go"], range=["#1f77b4", "#ff7f0e"]), legend=alt.Legend(title="Region"), ), size=alt.Size( "dominance_bucket:N", scale=alt.Scale(domain=DOMINANCE_LABELS, range=[50, 80, 120, 180]), sort=DOMINANCE_LABELS, legend=alt.Legend( title="Coffee dominance", symbolFillColor="#555", symbolStrokeWidth=0, symbolOpacity=1, ), ), tooltip=[ "site_id", "region", alt.Tooltip("mean_yield:Q", format=".0f", title="Yield (kg/ha)"), "total_richness", "woody_richness", "herb_richness", "bryophyte_richness", alt.Tooltip("dominance:Q", format=".3f"), alt.Tooltip("dominance_bucket:N", title="Dominance bucket"), ], ) scatter = points if len(filtered) > 0: median_yield = float(filtered.mean_yield.median()) median_richness = float(filtered.total_richness.median()) vline = alt.Chart(filtered).mark_rule( strokeDash=[4, 4], color="gray" ).encode(x=alt.datum(median_yield)) hline = alt.Chart(filtered).mark_rule( strokeDash=[4, 4], color="gray" ).encode(y=alt.datum(median_richness)) scatter = scatter + vline + hline ymin = float(filtered.mean_yield.min()) ymax = float(filtered.mean_yield.max()) rmin = float(filtered.total_richness.min()) rmax = float(filtered.total_richness.max()) right_labels = alt.Chart(pd_DataFrame([ {"x": ymax, "y": rmax, "label": "Win-win"}, {"x": ymax, "y": rmin, "label": "Concern"}, ])).mark_text( fontSize=11, fontWeight="bold", color="#555", align="right", dx=-4, dy=0 ).encode(x="x:Q", y="y:Q", text="label:N") left_labels = alt.Chart(pd_DataFrame([ {"x": ymin, "y": rmax, "label": "Low yield / rich"}, {"x": ymin, "y": rmin, "label": "Low yield / poor"}, ])).mark_text( fontSize=11, fontWeight="bold", color="#555", align="left", dx=4, dy=0 ).encode(x="x:Q", y="y:Q", text="label:N") scatter = scatter + right_labels + left_labels chart1 = mo.ui.altair_chart( scatter, chart_selection="point", legend_selection=["region", "dominance_bucket"], ) else: # When the user's slider+region filter excludes every site, building a # mo.ui.altair_chart with chart_selection / legend_selection on an empty # dataframe causes the marimo kernel to return 500 (the Vega selection # cannot be registered against zero rows). Render a placeholder chart # with no selections so the dropdown still works downstream. chart1 = mo.ui.altair_chart(scatter) chart1 return (chart1,) @app.cell def _(pd): # helper so the cell above can build a DataFrame inline def pd_DataFrame(x): return pd.DataFrame(x) return (pd_DataFrame,) @app.cell def _(alt, chart1, df, mo, pd, site_picker): # --- Linked species breakdown for the selected site(s) --- # Prefer the dropdown picker; fall back to the scatter click selection. picked = site_picker.value if picked and picked != "(pick a site)": sel = df[df.site_id == picked] else: sel = chart1.value # pandas DataFrame of currently selected rows if sel is None or len(sel) == 0: detail = mo.md( "*Pick a site from the **\"Selected site\" dropdown** at the top to see its " "species composition.*" ) elif len(sel) > 1: detail = mo.md( f"*{len(sel)} sites currently match the active filters. " "Pick one from the **\"Selected site\" dropdown** at the top to see its species " "composition.*" ) else: site = sel.iloc[0] long_df = pd.DataFrame({ "group": ["Woody", "Herbaceous", "Bryophyte"], "count": [site.woody_richness, site.herb_richness, site.bryophyte_richness], }) bar = alt.Chart(long_df).mark_bar().encode( x=alt.X("count:Q", title="Species count"), y=alt.Y("group:N", sort=["Woody", "Herbaceous", "Bryophyte"], title=None), color=alt.Color( "group:N", scale=alt.Scale( domain=["Woody", "Herbaceous", "Bryophyte"], range=["#2ca02c", "#d62728", "#1f77b4"], ), legend=None, ), tooltip=["group", "count"], ).properties( width=450, height=140, title=f"Site {site.site_id} · {site.region} · yield {site.mean_yield:.0f} kg/ha " f"· total richness {int(site.total_richness)}" ) detail = mo.ui.altair_chart(bar) detail return @app.cell def _(mo): mo.md(r""" ## Visual 2 — Small multiples: yield × richness per species group Three panels sharing the same x-axis (yield). The slopes differ dramatically between groups: woody responds strongly to yield pressure, bryophytes almost not at all. """) return @app.cell def _(alt, filtered, mo, pd, show_trend): if len(filtered) == 0: small_multiples = mo.md( "*No sites match the current filters — adjust the dominance range above.*" ) else: long = pd.melt( filtered, id_vars=["site_id", "region", "mean_yield"], value_vars=["woody_richness", "herb_richness", "bryophyte_richness"], var_name="group", value_name="richness", ) long["group"] = long["group"].map({ "woody_richness": "Woody", "herb_richness": "Herbaceous", "bryophyte_richness": "Bryophyte", }) panel_selection = alt.selection_point(fields=["site_id"], on="mouseover", empty=False) base_sm = alt.Chart(long).encode( x=alt.X("mean_yield:Q", title="Mean yield (kg/ha)"), y=alt.Y("richness:Q", title="Richness"), ) pts = base_sm.mark_circle(size=70, opacity=0.7).encode( color=alt.Color( "region:N", scale=alt.Scale(domain=["Ge", "Go"], range=["#1f77b4", "#ff7f0e"]), ), opacity=alt.condition(panel_selection, alt.value(1.0), alt.value(0.35)), stroke=alt.condition(panel_selection, alt.value("black"), alt.value(None)), tooltip=["site_id", "region", "mean_yield", "richness"], ).add_params(panel_selection) if show_trend.value and len(long) >= 2: xmin = float(long.mean_yield.min()) xmax = float(long.mean_yield.max()) if xmax > xmin: trend = base_sm.transform_regression( "mean_yield", "richness", groupby=["group"], extent=[xmin, xmax] ).mark_line(color="black", strokeDash=[4, 2]) layer = pts + trend else: layer = pts else: layer = pts small_multiples = layer.properties(width=240, height=240).facet( column=alt.Column("group:N", sort=["Woody", "Herbaceous", "Bryophyte"], title=None) ).resolve_scale(y="independent") small_multiples return @app.cell def _(mo): mo.md(r""" ## Visual 3 — Stacked glyph array sorted by yield (novel design) One glyph per site, arranged left-to-right by yield rank (lowest → highest). Glyph height encodes total species richness; colours encode the three species groups. The expected *wedge* — tall on the left, shrinking to the right — is the visual signature of selective biodiversity loss. """) return @app.cell def _(alt, filtered, mo, pd): if len(filtered) == 0: combined = mo.md( "*No sites match the current filters — adjust the dominance range above.*" ) else: glyph_long = pd.melt( filtered, id_vars=["site_id", "region", "mean_yield", "yield_rank"], value_vars=["woody_richness", "herb_richness", "bryophyte_richness"], var_name="group", value_name="count", ) group_map = { "woody_richness": "Woody", "herb_richness": "Herbaceous", "bryophyte_richness": "Bryophyte", } glyph_long["group"] = glyph_long["group"].map(group_map) glyphs = alt.Chart(glyph_long).mark_bar(size=10).encode( x=alt.X("yield_rank:O", title="Site rank (low → high yield)", axis=alt.Axis(labels=False, ticks=False)), y=alt.Y("count:Q", stack="zero", title="Species richness"), color=alt.Color( "group:N", scale=alt.Scale( domain=["Woody", "Herbaceous", "Bryophyte"], range=["#2ca02c", "#d62728", "#1f77b4"], ), legend=alt.Legend(title="Species group"), ), order=alt.Order("group:N", sort="ascending"), tooltip=[ "site_id", "region", "yield_rank", alt.Tooltip("mean_yield:Q", format=".0f"), "group", "count", ], ).properties(width=720, height=240) yield_line = alt.Chart(filtered).mark_line(color="gray", strokeWidth=1).encode( x=alt.X("yield_rank:O", axis=None), y=alt.Y("mean_yield:Q", title="Yield (kg/ha)"), ).properties(width=720, height=60) combined = alt.vconcat(yield_line, glyphs).resolve_scale(x="shared") combined return @app.cell def _(mo): mo.md(r""" --- ### Notes for reviewers - All three visuals share the region filter and dominance slider at the top. - Visual 1 and Visual 3 are linked through site identity: selecting a point in Visual 1 highlights its species breakdown; site ordering in Visual 3 preserves the identity of each point so you can cross-reference. - The full design rationale, including the NUF scoring of all ten diverge sketches, is in the accompanying Part 3 report. """) return if __name__ == "__main__": app.run()