remote-group2 / notebook.py
melihorhanyilmaz
Use if/else instead of early return in chart cells
8b81992
import marimo
__generated_with = "0.23.2"
app = marimo.App(
width="full",
app_title="Ethiopia Coffee Agroforests — Biodiversity vs Yield",
)
@app.cell
def _():
import marimo as mo
import pandas as pd
import altair as alt
import numpy as np
from pathlib import Path
return Path, alt, mo, np, pd
@app.cell
def _(mo):
mo.md(r"""
# Ethiopia Coffee Agroforests — Biodiversity vs Yield
**Remote Group 2** · Alperen Aydos, Arif Erkovan, Melih Yilmaz
Persona: *Sofia Almeida — biodiversity conservation activist*
Three linked custom visualisations exploring the trade-off between coffee yield and
plant biodiversity across 60 agroforest sites in the Ge and Go regions of Ethiopia.
""")
return
@app.cell
def _(Path, np, pd):
# --- Data loading ---
# Point DATA_PATH to your real CSV. If the file is missing, a synthetic dataset
# that matches the described structure is generated so the notebook still runs.
DATA_PATH = Path("data/coffee_sites.csv")
if DATA_PATH.exists():
df = pd.read_csv(DATA_PATH)
else:
rng = np.random.default_rng(42)
n_ge, n_go = 30, 30
ge = pd.DataFrame({
"site_id": [f"Ge{i+1:02d}" for i in range(n_ge)],
"region": "Ge",
"mean_yield": rng.normal(886, 300, n_ge).clip(60, 2400),
"woody_richness": rng.normal(22, 5, n_ge).clip(5, 40).round().astype(int),
"herb_richness": rng.normal(48, 10, n_ge).clip(15, 90).round().astype(int),
"bryophyte_richness": rng.normal(13, 4, n_ge).clip(2, 30).round().astype(int),
"dominance": rng.uniform(0.7, 0.99, n_ge),
})
go = pd.DataFrame({
"site_id": [f"Go{i+1:02d}" for i in range(n_go)],
"region": "Go",
"mean_yield": rng.normal(1158, 350, n_go).clip(60, 2400),
"woody_richness": rng.normal(14, 4, n_go).clip(3, 30).round().astype(int),
"herb_richness": rng.normal(35, 9, n_go).clip(10, 75).round().astype(int),
"bryophyte_richness": rng.normal(8, 3, n_go).clip(2, 25).round().astype(int),
"dominance": rng.uniform(0.92, 0.996, n_go),
})
df = pd.concat([ge, go], ignore_index=True)
df["total_richness"] = df.woody_richness + df.herb_richness + df.bryophyte_richness
df["yield_rank"] = df.mean_yield.rank(method="first").astype(int)
if "total_richness" not in df.columns:
df["total_richness"] = df.woody_richness + df.herb_richness + df.bryophyte_richness
if "yield_rank" not in df.columns:
df["yield_rank"] = df.mean_yield.rank(method="first").astype(int)
df = df.sort_values("yield_rank").reset_index(drop=True)
return (df,)
@app.cell
def _(df, mo):
# --- Global controls (used by all three visuals) ---
region_filter = mo.ui.multiselect(
options=["Ge", "Go"], value=["Ge", "Go"], label="Region"
)
dominance_range = mo.ui.range_slider(
start=float(df.dominance.min().round(2)),
stop=float(df.dominance.max().round(2)),
step=0.01,
value=(float(df.dominance.min().round(2)), float(df.dominance.max().round(2))),
label="Coffee dominance",
show_value=True,
)
show_trend = mo.ui.checkbox(value=True, label="Show trend line (Visual 2)")
site_options = sorted(df.site_id.unique(), key=lambda s: (s[:2], int(s[2:])))
site_picker = mo.ui.dropdown(
options=["(pick a site)"] + site_options,
value="(pick a site)",
label="Selected site",
)
mo.hstack(
[region_filter, dominance_range, show_trend, site_picker],
justify="start", gap=2,
)
return dominance_range, region_filter, show_trend, site_picker
@app.cell
def _(df, dominance_range, pd, region_filter):
filtered = df[
df.region.isin(region_filter.value)
& df.dominance.between(dominance_range.value[0], dominance_range.value[1])
].copy()
DOMINANCE_BINS = [0, 0.80, 0.90, 0.95, 1.01]
DOMINANCE_LABELS = ["<0.80", "0.80-0.90", "0.90-0.95", ">0.95"]
filtered["dominance_bucket"] = pd.cut(
filtered["dominance"], bins=DOMINANCE_BINS, labels=DOMINANCE_LABELS
).astype(str)
return DOMINANCE_LABELS, filtered
@app.cell
def _(mo):
mo.md(r"""
## Visual 1 — Win-win scatter with linked species breakdown
Yield × total species richness, coloured by region, sized by coffee dominance
bucket. Median lines split the plot into four quadrants. Use the **"Selected
site" dropdown** at the top to drill into a site's species composition (shown
below the scatter); **click a bucket** in the dominance legend to filter the
scatter to that management-intensity band.
""")
return
@app.cell
def _(DOMINANCE_LABELS, alt, df, filtered, mo, pd_DataFrame):
yield_max = float(df.mean_yield.max()) * 1.05
richness_max = float(df.total_richness.max()) * 1.1
base = alt.Chart(filtered).properties(width=620, height=380)
points = base.mark_circle(opacity=0.75, stroke="white", strokeWidth=0.5).encode(
x=alt.X("mean_yield:Q", title="Mean yield (kg/ha)",
scale=alt.Scale(domain=[0, yield_max])),
y=alt.Y("total_richness:Q", title="Total species richness",
scale=alt.Scale(domain=[0, richness_max])),
color=alt.Color(
"region:N",
scale=alt.Scale(domain=["Ge", "Go"], range=["#1f77b4", "#ff7f0e"]),
legend=alt.Legend(title="Region"),
),
size=alt.Size(
"dominance_bucket:N",
scale=alt.Scale(domain=DOMINANCE_LABELS, range=[50, 80, 120, 180]),
sort=DOMINANCE_LABELS,
legend=alt.Legend(
title="Coffee dominance",
symbolFillColor="#555",
symbolStrokeWidth=0,
symbolOpacity=1,
),
),
tooltip=[
"site_id", "region",
alt.Tooltip("mean_yield:Q", format=".0f", title="Yield (kg/ha)"),
"total_richness", "woody_richness", "herb_richness", "bryophyte_richness",
alt.Tooltip("dominance:Q", format=".3f"),
alt.Tooltip("dominance_bucket:N", title="Dominance bucket"),
],
)
scatter = points
if len(filtered) > 0:
median_yield = float(filtered.mean_yield.median())
median_richness = float(filtered.total_richness.median())
vline = alt.Chart(filtered).mark_rule(
strokeDash=[4, 4], color="gray"
).encode(x=alt.datum(median_yield))
hline = alt.Chart(filtered).mark_rule(
strokeDash=[4, 4], color="gray"
).encode(y=alt.datum(median_richness))
scatter = scatter + vline + hline
ymin = float(filtered.mean_yield.min())
ymax = float(filtered.mean_yield.max())
rmin = float(filtered.total_richness.min())
rmax = float(filtered.total_richness.max())
right_labels = alt.Chart(pd_DataFrame([
{"x": ymax, "y": rmax, "label": "Win-win"},
{"x": ymax, "y": rmin, "label": "Concern"},
])).mark_text(
fontSize=11, fontWeight="bold", color="#555", align="right", dx=-4, dy=0
).encode(x="x:Q", y="y:Q", text="label:N")
left_labels = alt.Chart(pd_DataFrame([
{"x": ymin, "y": rmax, "label": "Low yield / rich"},
{"x": ymin, "y": rmin, "label": "Low yield / poor"},
])).mark_text(
fontSize=11, fontWeight="bold", color="#555", align="left", dx=4, dy=0
).encode(x="x:Q", y="y:Q", text="label:N")
scatter = scatter + right_labels + left_labels
chart1 = mo.ui.altair_chart(
scatter, chart_selection="point",
legend_selection=["region", "dominance_bucket"],
)
else:
# When the user's slider+region filter excludes every site, building a
# mo.ui.altair_chart with chart_selection / legend_selection on an empty
# dataframe causes the marimo kernel to return 500 (the Vega selection
# cannot be registered against zero rows). Render a placeholder chart
# with no selections so the dropdown still works downstream.
chart1 = mo.ui.altair_chart(scatter)
chart1
return (chart1,)
@app.cell
def _(pd):
# helper so the cell above can build a DataFrame inline
def pd_DataFrame(x):
return pd.DataFrame(x)
return (pd_DataFrame,)
@app.cell
def _(alt, chart1, df, mo, pd, site_picker):
# --- Linked species breakdown for the selected site(s) ---
# Prefer the dropdown picker; fall back to the scatter click selection.
picked = site_picker.value
if picked and picked != "(pick a site)":
sel = df[df.site_id == picked]
else:
sel = chart1.value # pandas DataFrame of currently selected rows
if sel is None or len(sel) == 0:
detail = mo.md(
"*Pick a site from the **\"Selected site\" dropdown** at the top to see its "
"species composition.*"
)
elif len(sel) > 1:
detail = mo.md(
f"*{len(sel)} sites currently match the active filters. "
"Pick one from the **\"Selected site\" dropdown** at the top to see its species "
"composition.*"
)
else:
site = sel.iloc[0]
long_df = pd.DataFrame({
"group": ["Woody", "Herbaceous", "Bryophyte"],
"count": [site.woody_richness, site.herb_richness, site.bryophyte_richness],
})
bar = alt.Chart(long_df).mark_bar().encode(
x=alt.X("count:Q", title="Species count"),
y=alt.Y("group:N", sort=["Woody", "Herbaceous", "Bryophyte"], title=None),
color=alt.Color(
"group:N",
scale=alt.Scale(
domain=["Woody", "Herbaceous", "Bryophyte"],
range=["#2ca02c", "#d62728", "#1f77b4"],
),
legend=None,
),
tooltip=["group", "count"],
).properties(
width=450, height=140,
title=f"Site {site.site_id} · {site.region} · yield {site.mean_yield:.0f} kg/ha "
f"· total richness {int(site.total_richness)}"
)
detail = mo.ui.altair_chart(bar)
detail
return
@app.cell
def _(mo):
mo.md(r"""
## Visual 2 — Small multiples: yield × richness per species group
Three panels sharing the same x-axis (yield). The slopes differ dramatically between
groups: woody responds strongly to yield pressure, bryophytes almost not at all.
""")
return
@app.cell
def _(alt, filtered, mo, pd, show_trend):
if len(filtered) == 0:
small_multiples = mo.md(
"*No sites match the current filters — adjust the dominance range above.*"
)
else:
long = pd.melt(
filtered,
id_vars=["site_id", "region", "mean_yield"],
value_vars=["woody_richness", "herb_richness", "bryophyte_richness"],
var_name="group", value_name="richness",
)
long["group"] = long["group"].map({
"woody_richness": "Woody",
"herb_richness": "Herbaceous",
"bryophyte_richness": "Bryophyte",
})
panel_selection = alt.selection_point(fields=["site_id"], on="mouseover", empty=False)
base_sm = alt.Chart(long).encode(
x=alt.X("mean_yield:Q", title="Mean yield (kg/ha)"),
y=alt.Y("richness:Q", title="Richness"),
)
pts = base_sm.mark_circle(size=70, opacity=0.7).encode(
color=alt.Color(
"region:N",
scale=alt.Scale(domain=["Ge", "Go"], range=["#1f77b4", "#ff7f0e"]),
),
opacity=alt.condition(panel_selection, alt.value(1.0), alt.value(0.35)),
stroke=alt.condition(panel_selection, alt.value("black"), alt.value(None)),
tooltip=["site_id", "region", "mean_yield", "richness"],
).add_params(panel_selection)
if show_trend.value and len(long) >= 2:
xmin = float(long.mean_yield.min())
xmax = float(long.mean_yield.max())
if xmax > xmin:
trend = base_sm.transform_regression(
"mean_yield", "richness", groupby=["group"], extent=[xmin, xmax]
).mark_line(color="black", strokeDash=[4, 2])
layer = pts + trend
else:
layer = pts
else:
layer = pts
small_multiples = layer.properties(width=240, height=240).facet(
column=alt.Column("group:N", sort=["Woody", "Herbaceous", "Bryophyte"], title=None)
).resolve_scale(y="independent")
small_multiples
return
@app.cell
def _(mo):
mo.md(r"""
## Visual 3 — Stacked glyph array sorted by yield (novel design)
One glyph per site, arranged left-to-right by yield rank (lowest → highest).
Glyph height encodes total species richness; colours encode the three species groups.
The expected *wedge* — tall on the left, shrinking to the right — is the visual
signature of selective biodiversity loss.
""")
return
@app.cell
def _(alt, filtered, mo, pd):
if len(filtered) == 0:
combined = mo.md(
"*No sites match the current filters — adjust the dominance range above.*"
)
else:
glyph_long = pd.melt(
filtered,
id_vars=["site_id", "region", "mean_yield", "yield_rank"],
value_vars=["woody_richness", "herb_richness", "bryophyte_richness"],
var_name="group", value_name="count",
)
group_map = {
"woody_richness": "Woody",
"herb_richness": "Herbaceous",
"bryophyte_richness": "Bryophyte",
}
glyph_long["group"] = glyph_long["group"].map(group_map)
glyphs = alt.Chart(glyph_long).mark_bar(size=10).encode(
x=alt.X("yield_rank:O", title="Site rank (low → high yield)",
axis=alt.Axis(labels=False, ticks=False)),
y=alt.Y("count:Q", stack="zero", title="Species richness"),
color=alt.Color(
"group:N",
scale=alt.Scale(
domain=["Woody", "Herbaceous", "Bryophyte"],
range=["#2ca02c", "#d62728", "#1f77b4"],
),
legend=alt.Legend(title="Species group"),
),
order=alt.Order("group:N", sort="ascending"),
tooltip=[
"site_id", "region", "yield_rank",
alt.Tooltip("mean_yield:Q", format=".0f"),
"group", "count",
],
).properties(width=720, height=240)
yield_line = alt.Chart(filtered).mark_line(color="gray", strokeWidth=1).encode(
x=alt.X("yield_rank:O", axis=None),
y=alt.Y("mean_yield:Q", title="Yield (kg/ha)"),
).properties(width=720, height=60)
combined = alt.vconcat(yield_line, glyphs).resolve_scale(x="shared")
combined
return
@app.cell
def _(mo):
mo.md(r"""
---
### Notes for reviewers
- All three visuals share the region filter and dominance slider at the top.
- Visual 1 and Visual 3 are linked through site identity: selecting a point in
Visual 1 highlights its species breakdown; site ordering in Visual 3 preserves
the identity of each point so you can cross-reference.
- The full design rationale, including the NUF scoring of all ten diverge sketches,
is in the accompanying Part 3 report.
""")
return
if __name__ == "__main__":
app.run()