Spaces:
Sleeping
Sleeping
| import marimo | |
| __generated_with = "0.23.2" | |
| app = marimo.App( | |
| width="full", | |
| app_title="Ethiopia Coffee Agroforests — Biodiversity vs Yield", | |
| ) | |
| def _(): | |
| import marimo as mo | |
| import pandas as pd | |
| import altair as alt | |
| import numpy as np | |
| from pathlib import Path | |
| return Path, alt, mo, np, pd | |
| def _(mo): | |
| mo.md(r""" | |
| # Ethiopia Coffee Agroforests — Biodiversity vs Yield | |
| **Remote Group 2** · Alperen Aydos, Arif Erkovan, Melih Yilmaz | |
| Persona: *Sofia Almeida — biodiversity conservation activist* | |
| Three linked custom visualisations exploring the trade-off between coffee yield and | |
| plant biodiversity across 60 agroforest sites in the Ge and Go regions of Ethiopia. | |
| """) | |
| return | |
| def _(Path, np, pd): | |
| # --- Data loading --- | |
| # Point DATA_PATH to your real CSV. If the file is missing, a synthetic dataset | |
| # that matches the described structure is generated so the notebook still runs. | |
| DATA_PATH = Path("data/coffee_sites.csv") | |
| if DATA_PATH.exists(): | |
| df = pd.read_csv(DATA_PATH) | |
| else: | |
| rng = np.random.default_rng(42) | |
| n_ge, n_go = 30, 30 | |
| ge = pd.DataFrame({ | |
| "site_id": [f"Ge{i+1:02d}" for i in range(n_ge)], | |
| "region": "Ge", | |
| "mean_yield": rng.normal(886, 300, n_ge).clip(60, 2400), | |
| "woody_richness": rng.normal(22, 5, n_ge).clip(5, 40).round().astype(int), | |
| "herb_richness": rng.normal(48, 10, n_ge).clip(15, 90).round().astype(int), | |
| "bryophyte_richness": rng.normal(13, 4, n_ge).clip(2, 30).round().astype(int), | |
| "dominance": rng.uniform(0.7, 0.99, n_ge), | |
| }) | |
| go = pd.DataFrame({ | |
| "site_id": [f"Go{i+1:02d}" for i in range(n_go)], | |
| "region": "Go", | |
| "mean_yield": rng.normal(1158, 350, n_go).clip(60, 2400), | |
| "woody_richness": rng.normal(14, 4, n_go).clip(3, 30).round().astype(int), | |
| "herb_richness": rng.normal(35, 9, n_go).clip(10, 75).round().astype(int), | |
| "bryophyte_richness": rng.normal(8, 3, n_go).clip(2, 25).round().astype(int), | |
| "dominance": rng.uniform(0.92, 0.996, n_go), | |
| }) | |
| df = pd.concat([ge, go], ignore_index=True) | |
| df["total_richness"] = df.woody_richness + df.herb_richness + df.bryophyte_richness | |
| df["yield_rank"] = df.mean_yield.rank(method="first").astype(int) | |
| if "total_richness" not in df.columns: | |
| df["total_richness"] = df.woody_richness + df.herb_richness + df.bryophyte_richness | |
| if "yield_rank" not in df.columns: | |
| df["yield_rank"] = df.mean_yield.rank(method="first").astype(int) | |
| df = df.sort_values("yield_rank").reset_index(drop=True) | |
| return (df,) | |
| def _(df, mo): | |
| # --- Global controls (used by all three visuals) --- | |
| region_filter = mo.ui.multiselect( | |
| options=["Ge", "Go"], value=["Ge", "Go"], label="Region" | |
| ) | |
| dominance_range = mo.ui.range_slider( | |
| start=float(df.dominance.min().round(2)), | |
| stop=float(df.dominance.max().round(2)), | |
| step=0.01, | |
| value=(float(df.dominance.min().round(2)), float(df.dominance.max().round(2))), | |
| label="Coffee dominance", | |
| show_value=True, | |
| ) | |
| show_trend = mo.ui.checkbox(value=True, label="Show trend line (Visual 2)") | |
| site_options = sorted(df.site_id.unique(), key=lambda s: (s[:2], int(s[2:]))) | |
| site_picker = mo.ui.dropdown( | |
| options=["(pick a site)"] + site_options, | |
| value="(pick a site)", | |
| label="Selected site", | |
| ) | |
| mo.hstack( | |
| [region_filter, dominance_range, show_trend, site_picker], | |
| justify="start", gap=2, | |
| ) | |
| return dominance_range, region_filter, show_trend, site_picker | |
| def _(df, dominance_range, pd, region_filter): | |
| filtered = df[ | |
| df.region.isin(region_filter.value) | |
| & df.dominance.between(dominance_range.value[0], dominance_range.value[1]) | |
| ].copy() | |
| DOMINANCE_BINS = [0, 0.80, 0.90, 0.95, 1.01] | |
| DOMINANCE_LABELS = ["<0.80", "0.80-0.90", "0.90-0.95", ">0.95"] | |
| filtered["dominance_bucket"] = pd.cut( | |
| filtered["dominance"], bins=DOMINANCE_BINS, labels=DOMINANCE_LABELS | |
| ).astype(str) | |
| return DOMINANCE_LABELS, filtered | |
| def _(mo): | |
| mo.md(r""" | |
| ## Visual 1 — Win-win scatter with linked species breakdown | |
| Yield × total species richness, coloured by region, sized by coffee dominance | |
| bucket. Median lines split the plot into four quadrants. Use the **"Selected | |
| site" dropdown** at the top to drill into a site's species composition (shown | |
| below the scatter); **click a bucket** in the dominance legend to filter the | |
| scatter to that management-intensity band. | |
| """) | |
| return | |
| def _(DOMINANCE_LABELS, alt, df, filtered, mo, pd_DataFrame): | |
| yield_max = float(df.mean_yield.max()) * 1.05 | |
| richness_max = float(df.total_richness.max()) * 1.1 | |
| base = alt.Chart(filtered).properties(width=620, height=380) | |
| points = base.mark_circle(opacity=0.75, stroke="white", strokeWidth=0.5).encode( | |
| x=alt.X("mean_yield:Q", title="Mean yield (kg/ha)", | |
| scale=alt.Scale(domain=[0, yield_max])), | |
| y=alt.Y("total_richness:Q", title="Total species richness", | |
| scale=alt.Scale(domain=[0, richness_max])), | |
| color=alt.Color( | |
| "region:N", | |
| scale=alt.Scale(domain=["Ge", "Go"], range=["#1f77b4", "#ff7f0e"]), | |
| legend=alt.Legend(title="Region"), | |
| ), | |
| size=alt.Size( | |
| "dominance_bucket:N", | |
| scale=alt.Scale(domain=DOMINANCE_LABELS, range=[50, 80, 120, 180]), | |
| sort=DOMINANCE_LABELS, | |
| legend=alt.Legend( | |
| title="Coffee dominance", | |
| symbolFillColor="#555", | |
| symbolStrokeWidth=0, | |
| symbolOpacity=1, | |
| ), | |
| ), | |
| tooltip=[ | |
| "site_id", "region", | |
| alt.Tooltip("mean_yield:Q", format=".0f", title="Yield (kg/ha)"), | |
| "total_richness", "woody_richness", "herb_richness", "bryophyte_richness", | |
| alt.Tooltip("dominance:Q", format=".3f"), | |
| alt.Tooltip("dominance_bucket:N", title="Dominance bucket"), | |
| ], | |
| ) | |
| scatter = points | |
| if len(filtered) > 0: | |
| median_yield = float(filtered.mean_yield.median()) | |
| median_richness = float(filtered.total_richness.median()) | |
| vline = alt.Chart(filtered).mark_rule( | |
| strokeDash=[4, 4], color="gray" | |
| ).encode(x=alt.datum(median_yield)) | |
| hline = alt.Chart(filtered).mark_rule( | |
| strokeDash=[4, 4], color="gray" | |
| ).encode(y=alt.datum(median_richness)) | |
| scatter = scatter + vline + hline | |
| ymin = float(filtered.mean_yield.min()) | |
| ymax = float(filtered.mean_yield.max()) | |
| rmin = float(filtered.total_richness.min()) | |
| rmax = float(filtered.total_richness.max()) | |
| right_labels = alt.Chart(pd_DataFrame([ | |
| {"x": ymax, "y": rmax, "label": "Win-win"}, | |
| {"x": ymax, "y": rmin, "label": "Concern"}, | |
| ])).mark_text( | |
| fontSize=11, fontWeight="bold", color="#555", align="right", dx=-4, dy=0 | |
| ).encode(x="x:Q", y="y:Q", text="label:N") | |
| left_labels = alt.Chart(pd_DataFrame([ | |
| {"x": ymin, "y": rmax, "label": "Low yield / rich"}, | |
| {"x": ymin, "y": rmin, "label": "Low yield / poor"}, | |
| ])).mark_text( | |
| fontSize=11, fontWeight="bold", color="#555", align="left", dx=4, dy=0 | |
| ).encode(x="x:Q", y="y:Q", text="label:N") | |
| scatter = scatter + right_labels + left_labels | |
| chart1 = mo.ui.altair_chart( | |
| scatter, chart_selection="point", | |
| legend_selection=["region", "dominance_bucket"], | |
| ) | |
| else: | |
| # When the user's slider+region filter excludes every site, building a | |
| # mo.ui.altair_chart with chart_selection / legend_selection on an empty | |
| # dataframe causes the marimo kernel to return 500 (the Vega selection | |
| # cannot be registered against zero rows). Render a placeholder chart | |
| # with no selections so the dropdown still works downstream. | |
| chart1 = mo.ui.altair_chart(scatter) | |
| chart1 | |
| return (chart1,) | |
| def _(pd): | |
| # helper so the cell above can build a DataFrame inline | |
| def pd_DataFrame(x): | |
| return pd.DataFrame(x) | |
| return (pd_DataFrame,) | |
| def _(alt, chart1, df, mo, pd, site_picker): | |
| # --- Linked species breakdown for the selected site(s) --- | |
| # Prefer the dropdown picker; fall back to the scatter click selection. | |
| picked = site_picker.value | |
| if picked and picked != "(pick a site)": | |
| sel = df[df.site_id == picked] | |
| else: | |
| sel = chart1.value # pandas DataFrame of currently selected rows | |
| if sel is None or len(sel) == 0: | |
| detail = mo.md( | |
| "*Pick a site from the **\"Selected site\" dropdown** at the top to see its " | |
| "species composition.*" | |
| ) | |
| elif len(sel) > 1: | |
| detail = mo.md( | |
| f"*{len(sel)} sites currently match the active filters. " | |
| "Pick one from the **\"Selected site\" dropdown** at the top to see its species " | |
| "composition.*" | |
| ) | |
| else: | |
| site = sel.iloc[0] | |
| long_df = pd.DataFrame({ | |
| "group": ["Woody", "Herbaceous", "Bryophyte"], | |
| "count": [site.woody_richness, site.herb_richness, site.bryophyte_richness], | |
| }) | |
| bar = alt.Chart(long_df).mark_bar().encode( | |
| x=alt.X("count:Q", title="Species count"), | |
| y=alt.Y("group:N", sort=["Woody", "Herbaceous", "Bryophyte"], title=None), | |
| color=alt.Color( | |
| "group:N", | |
| scale=alt.Scale( | |
| domain=["Woody", "Herbaceous", "Bryophyte"], | |
| range=["#2ca02c", "#d62728", "#1f77b4"], | |
| ), | |
| legend=None, | |
| ), | |
| tooltip=["group", "count"], | |
| ).properties( | |
| width=450, height=140, | |
| title=f"Site {site.site_id} · {site.region} · yield {site.mean_yield:.0f} kg/ha " | |
| f"· total richness {int(site.total_richness)}" | |
| ) | |
| detail = mo.ui.altair_chart(bar) | |
| detail | |
| return | |
| def _(mo): | |
| mo.md(r""" | |
| ## Visual 2 — Small multiples: yield × richness per species group | |
| Three panels sharing the same x-axis (yield). The slopes differ dramatically between | |
| groups: woody responds strongly to yield pressure, bryophytes almost not at all. | |
| """) | |
| return | |
| def _(alt, filtered, mo, pd, show_trend): | |
| if len(filtered) == 0: | |
| small_multiples = mo.md( | |
| "*No sites match the current filters — adjust the dominance range above.*" | |
| ) | |
| else: | |
| long = pd.melt( | |
| filtered, | |
| id_vars=["site_id", "region", "mean_yield"], | |
| value_vars=["woody_richness", "herb_richness", "bryophyte_richness"], | |
| var_name="group", value_name="richness", | |
| ) | |
| long["group"] = long["group"].map({ | |
| "woody_richness": "Woody", | |
| "herb_richness": "Herbaceous", | |
| "bryophyte_richness": "Bryophyte", | |
| }) | |
| panel_selection = alt.selection_point(fields=["site_id"], on="mouseover", empty=False) | |
| base_sm = alt.Chart(long).encode( | |
| x=alt.X("mean_yield:Q", title="Mean yield (kg/ha)"), | |
| y=alt.Y("richness:Q", title="Richness"), | |
| ) | |
| pts = base_sm.mark_circle(size=70, opacity=0.7).encode( | |
| color=alt.Color( | |
| "region:N", | |
| scale=alt.Scale(domain=["Ge", "Go"], range=["#1f77b4", "#ff7f0e"]), | |
| ), | |
| opacity=alt.condition(panel_selection, alt.value(1.0), alt.value(0.35)), | |
| stroke=alt.condition(panel_selection, alt.value("black"), alt.value(None)), | |
| tooltip=["site_id", "region", "mean_yield", "richness"], | |
| ).add_params(panel_selection) | |
| if show_trend.value and len(long) >= 2: | |
| xmin = float(long.mean_yield.min()) | |
| xmax = float(long.mean_yield.max()) | |
| if xmax > xmin: | |
| trend = base_sm.transform_regression( | |
| "mean_yield", "richness", groupby=["group"], extent=[xmin, xmax] | |
| ).mark_line(color="black", strokeDash=[4, 2]) | |
| layer = pts + trend | |
| else: | |
| layer = pts | |
| else: | |
| layer = pts | |
| small_multiples = layer.properties(width=240, height=240).facet( | |
| column=alt.Column("group:N", sort=["Woody", "Herbaceous", "Bryophyte"], title=None) | |
| ).resolve_scale(y="independent") | |
| small_multiples | |
| return | |
| def _(mo): | |
| mo.md(r""" | |
| ## Visual 3 — Stacked glyph array sorted by yield (novel design) | |
| One glyph per site, arranged left-to-right by yield rank (lowest → highest). | |
| Glyph height encodes total species richness; colours encode the three species groups. | |
| The expected *wedge* — tall on the left, shrinking to the right — is the visual | |
| signature of selective biodiversity loss. | |
| """) | |
| return | |
| def _(alt, filtered, mo, pd): | |
| if len(filtered) == 0: | |
| combined = mo.md( | |
| "*No sites match the current filters — adjust the dominance range above.*" | |
| ) | |
| else: | |
| glyph_long = pd.melt( | |
| filtered, | |
| id_vars=["site_id", "region", "mean_yield", "yield_rank"], | |
| value_vars=["woody_richness", "herb_richness", "bryophyte_richness"], | |
| var_name="group", value_name="count", | |
| ) | |
| group_map = { | |
| "woody_richness": "Woody", | |
| "herb_richness": "Herbaceous", | |
| "bryophyte_richness": "Bryophyte", | |
| } | |
| glyph_long["group"] = glyph_long["group"].map(group_map) | |
| glyphs = alt.Chart(glyph_long).mark_bar(size=10).encode( | |
| x=alt.X("yield_rank:O", title="Site rank (low → high yield)", | |
| axis=alt.Axis(labels=False, ticks=False)), | |
| y=alt.Y("count:Q", stack="zero", title="Species richness"), | |
| color=alt.Color( | |
| "group:N", | |
| scale=alt.Scale( | |
| domain=["Woody", "Herbaceous", "Bryophyte"], | |
| range=["#2ca02c", "#d62728", "#1f77b4"], | |
| ), | |
| legend=alt.Legend(title="Species group"), | |
| ), | |
| order=alt.Order("group:N", sort="ascending"), | |
| tooltip=[ | |
| "site_id", "region", "yield_rank", | |
| alt.Tooltip("mean_yield:Q", format=".0f"), | |
| "group", "count", | |
| ], | |
| ).properties(width=720, height=240) | |
| yield_line = alt.Chart(filtered).mark_line(color="gray", strokeWidth=1).encode( | |
| x=alt.X("yield_rank:O", axis=None), | |
| y=alt.Y("mean_yield:Q", title="Yield (kg/ha)"), | |
| ).properties(width=720, height=60) | |
| combined = alt.vconcat(yield_line, glyphs).resolve_scale(x="shared") | |
| combined | |
| return | |
| def _(mo): | |
| mo.md(r""" | |
| --- | |
| ### Notes for reviewers | |
| - All three visuals share the region filter and dominance slider at the top. | |
| - Visual 1 and Visual 3 are linked through site identity: selecting a point in | |
| Visual 1 highlights its species breakdown; site ordering in Visual 3 preserves | |
| the identity of each point so you can cross-reference. | |
| - The full design rationale, including the NUF scoring of all ten diverge sketches, | |
| is in the accompanying Part 3 report. | |
| """) | |
| return | |
| if __name__ == "__main__": | |
| app.run() | |