Spaces:

moy1147
/

remote-group2

Sleeping

melihorhanyilmaz

Use if/else instead of early return in chart cells

8b81992 16 days ago

15.7 kB

	import marimo

	__generated_with = "0.23.2"
	app = marimo.App(
	width="full",
	app_title="Ethiopia Coffee Agroforests — Biodiversity vs Yield",
	)


	@app.cell
	def _():
	import marimo as mo
	import pandas as pd
	import altair as alt
	import numpy as np
	from pathlib import Path

	return Path, alt, mo, np, pd


	@app.cell
	def _(mo):
	mo.md(r"""
	# Ethiopia Coffee Agroforests — Biodiversity vs Yield

	Remote Group 2 · Alperen Aydos, Arif Erkovan, Melih Yilmaz
	Persona: Sofia Almeida — biodiversity conservation activist

	Three linked custom visualisations exploring the trade-off between coffee yield and
	plant biodiversity across 60 agroforest sites in the Ge and Go regions of Ethiopia.
	""")
	return


	@app.cell
	def _(Path, np, pd):
	# --- Data loading ---
	# Point DATA_PATH to your real CSV. If the file is missing, a synthetic dataset
	# that matches the described structure is generated so the notebook still runs.
	DATA_PATH = Path("data/coffee_sites.csv")

	if DATA_PATH.exists():
	df = pd.read_csv(DATA_PATH)
	else:
	rng = np.random.default_rng(42)
	n_ge, n_go = 30, 30
	ge = pd.DataFrame({
	"site_id": [f"Ge{i+1:02d}" for i in range(n_ge)],
	"region": "Ge",
	"mean_yield": rng.normal(886, 300, n_ge).clip(60, 2400),
	"woody_richness": rng.normal(22, 5, n_ge).clip(5, 40).round().astype(int),
	"herb_richness": rng.normal(48, 10, n_ge).clip(15, 90).round().astype(int),
	"bryophyte_richness": rng.normal(13, 4, n_ge).clip(2, 30).round().astype(int),
	"dominance": rng.uniform(0.7, 0.99, n_ge),
	})
	go = pd.DataFrame({
	"site_id": [f"Go{i+1:02d}" for i in range(n_go)],
	"region": "Go",
	"mean_yield": rng.normal(1158, 350, n_go).clip(60, 2400),
	"woody_richness": rng.normal(14, 4, n_go).clip(3, 30).round().astype(int),
	"herb_richness": rng.normal(35, 9, n_go).clip(10, 75).round().astype(int),
	"bryophyte_richness": rng.normal(8, 3, n_go).clip(2, 25).round().astype(int),
	"dominance": rng.uniform(0.92, 0.996, n_go),
	})
	df = pd.concat([ge, go], ignore_index=True)
	df["total_richness"] = df.woody_richness + df.herb_richness + df.bryophyte_richness
	df["yield_rank"] = df.mean_yield.rank(method="first").astype(int)

	if "total_richness" not in df.columns:
	df["total_richness"] = df.woody_richness + df.herb_richness + df.bryophyte_richness
	if "yield_rank" not in df.columns:
	df["yield_rank"] = df.mean_yield.rank(method="first").astype(int)

	df = df.sort_values("yield_rank").reset_index(drop=True)
	return (df,)


	@app.cell
	def _(df, mo):
	# --- Global controls (used by all three visuals) ---
	region_filter = mo.ui.multiselect(
	options=["Ge", "Go"], value=["Ge", "Go"], label="Region"
	)
	dominance_range = mo.ui.range_slider(
	start=float(df.dominance.min().round(2)),
	stop=float(df.dominance.max().round(2)),
	step=0.01,
	value=(float(df.dominance.min().round(2)), float(df.dominance.max().round(2))),
	label="Coffee dominance",
	show_value=True,
	)
	show_trend = mo.ui.checkbox(value=True, label="Show trend line (Visual 2)")
	site_options = sorted(df.site_id.unique(), key=lambda s: (s[:2], int(s[2:])))
	site_picker = mo.ui.dropdown(
	options=["(pick a site)"] + site_options,
	value="(pick a site)",
	label="Selected site",
	)

	mo.hstack(
	[region_filter, dominance_range, show_trend, site_picker],
	justify="start", gap=2,
	)
	return dominance_range, region_filter, show_trend, site_picker


	@app.cell
	def _(df, dominance_range, pd, region_filter):
	filtered = df[
	df.region.isin(region_filter.value)
	& df.dominance.between(dominance_range.value[0], dominance_range.value[1])
	].copy()
	DOMINANCE_BINS = [0, 0.80, 0.90, 0.95, 1.01]
	DOMINANCE_LABELS = ["<0.80", "0.80-0.90", "0.90-0.95", ">0.95"]
	filtered["dominance_bucket"] = pd.cut(
	filtered["dominance"], bins=DOMINANCE_BINS, labels=DOMINANCE_LABELS
	).astype(str)
	return DOMINANCE_LABELS, filtered


	@app.cell
	def _(mo):
	mo.md(r"""
	## Visual 1 — Win-win scatter with linked species breakdown

	Yield × total species richness, coloured by region, sized by coffee dominance
	bucket. Median lines split the plot into four quadrants. Use the **"Selected
	site" dropdown** at the top to drill into a site's species composition (shown
	below the scatter); click a bucket in the dominance legend to filter the
	scatter to that management-intensity band.
	""")
	return


	@app.cell
	def _(DOMINANCE_LABELS, alt, df, filtered, mo, pd_DataFrame):
	yield_max = float(df.mean_yield.max()) * 1.05
	richness_max = float(df.total_richness.max()) * 1.1

	base = alt.Chart(filtered).properties(width=620, height=380)

	points = base.mark_circle(opacity=0.75, stroke="white", strokeWidth=0.5).encode(
	x=alt.X("mean_yield:Q", title="Mean yield (kg/ha)",
	scale=alt.Scale(domain=[0, yield_max])),
	y=alt.Y("total_richness:Q", title="Total species richness",
	scale=alt.Scale(domain=[0, richness_max])),
	color=alt.Color(
	"region:N",
	scale=alt.Scale(domain=["Ge", "Go"], range=["#1f77b4", "#ff7f0e"]),
	legend=alt.Legend(title="Region"),
	),
	size=alt.Size(
	"dominance_bucket:N",
	scale=alt.Scale(domain=DOMINANCE_LABELS, range=[50, 80, 120, 180]),
	sort=DOMINANCE_LABELS,
	legend=alt.Legend(
	title="Coffee dominance",
	symbolFillColor="#555",
	symbolStrokeWidth=0,
	symbolOpacity=1,
	),
	),
	tooltip=[
	"site_id", "region",
	alt.Tooltip("mean_yield:Q", format=".0f", title="Yield (kg/ha)"),
	"total_richness", "woody_richness", "herb_richness", "bryophyte_richness",
	alt.Tooltip("dominance:Q", format=".3f"),
	alt.Tooltip("dominance_bucket:N", title="Dominance bucket"),
	],
	)

	scatter = points
	if len(filtered) > 0:
	median_yield = float(filtered.mean_yield.median())
	median_richness = float(filtered.total_richness.median())
	vline = alt.Chart(filtered).mark_rule(
	strokeDash=[4, 4], color="gray"
	).encode(x=alt.datum(median_yield))
	hline = alt.Chart(filtered).mark_rule(
	strokeDash=[4, 4], color="gray"
	).encode(y=alt.datum(median_richness))
	scatter = scatter + vline + hline

	ymin = float(filtered.mean_yield.min())
	ymax = float(filtered.mean_yield.max())
	rmin = float(filtered.total_richness.min())
	rmax = float(filtered.total_richness.max())
	right_labels = alt.Chart(pd_DataFrame([
	{"x": ymax, "y": rmax, "label": "Win-win"},
	{"x": ymax, "y": rmin, "label": "Concern"},
	])).mark_text(
	fontSize=11, fontWeight="bold", color="#555", align="right", dx=-4, dy=0
	).encode(x="x:Q", y="y:Q", text="label:N")
	left_labels = alt.Chart(pd_DataFrame([
	{"x": ymin, "y": rmax, "label": "Low yield / rich"},
	{"x": ymin, "y": rmin, "label": "Low yield / poor"},
	])).mark_text(
	fontSize=11, fontWeight="bold", color="#555", align="left", dx=4, dy=0
	).encode(x="x:Q", y="y:Q", text="label:N")
	scatter = scatter + right_labels + left_labels
	chart1 = mo.ui.altair_chart(
	scatter, chart_selection="point",
	legend_selection=["region", "dominance_bucket"],
	)
	else:
	# When the user's slider+region filter excludes every site, building a
	# mo.ui.altair_chart with chart_selection / legend_selection on an empty
	# dataframe causes the marimo kernel to return 500 (the Vega selection
	# cannot be registered against zero rows). Render a placeholder chart
	# with no selections so the dropdown still works downstream.
	chart1 = mo.ui.altair_chart(scatter)
	chart1
	return (chart1,)


	@app.cell
	def _(pd):
	# helper so the cell above can build a DataFrame inline
	def pd_DataFrame(x):
	return pd.DataFrame(x)

	return (pd_DataFrame,)


	@app.cell
	def _(alt, chart1, df, mo, pd, site_picker):
	# --- Linked species breakdown for the selected site(s) ---
	# Prefer the dropdown picker; fall back to the scatter click selection.
	picked = site_picker.value
	if picked and picked != "(pick a site)":
	sel = df[df.site_id == picked]
	else:
	sel = chart1.value # pandas DataFrame of currently selected rows
	if sel is None or len(sel) == 0:
	detail = mo.md(
	"Pick a site from the \"Selected site\" dropdown* at the top to see its "
	"species composition.*"
	)
	elif len(sel) > 1:
	detail = mo.md(
	f"*{len(sel)} sites currently match the active filters. "
	"Pick one from the \"Selected site\" dropdown at the top to see its species "
	"composition.*"
	)
	else:
	site = sel.iloc[0]
	long_df = pd.DataFrame({
	"group": ["Woody", "Herbaceous", "Bryophyte"],
	"count": [site.woody_richness, site.herb_richness, site.bryophyte_richness],
	})
	bar = alt.Chart(long_df).mark_bar().encode(
	x=alt.X("count:Q", title="Species count"),
	y=alt.Y("group:N", sort=["Woody", "Herbaceous", "Bryophyte"], title=None),
	color=alt.Color(
	"group:N",
	scale=alt.Scale(
	domain=["Woody", "Herbaceous", "Bryophyte"],
	range=["#2ca02c", "#d62728", "#1f77b4"],
	),
	legend=None,
	),
	tooltip=["group", "count"],
	).properties(
	width=450, height=140,
	title=f"Site {site.site_id} · {site.region} · yield {site.mean_yield:.0f} kg/ha "
	f"· total richness {int(site.total_richness)}"
	)
	detail = mo.ui.altair_chart(bar)
	detail
	return


	@app.cell
	def _(mo):
	mo.md(r"""
	## Visual 2 — Small multiples: yield × richness per species group

	Three panels sharing the same x-axis (yield). The slopes differ dramatically between
	groups: woody responds strongly to yield pressure, bryophytes almost not at all.
	""")
	return


	@app.cell
	def _(alt, filtered, mo, pd, show_trend):
	if len(filtered) == 0:
	small_multiples = mo.md(
	"No sites match the current filters — adjust the dominance range above."
	)
	else:
	long = pd.melt(
	filtered,
	id_vars=["site_id", "region", "mean_yield"],
	value_vars=["woody_richness", "herb_richness", "bryophyte_richness"],
	var_name="group", value_name="richness",
	)
	long["group"] = long["group"].map({
	"woody_richness": "Woody",
	"herb_richness": "Herbaceous",
	"bryophyte_richness": "Bryophyte",
	})

	panel_selection = alt.selection_point(fields=["site_id"], on="mouseover", empty=False)

	base_sm = alt.Chart(long).encode(
	x=alt.X("mean_yield:Q", title="Mean yield (kg/ha)"),
	y=alt.Y("richness:Q", title="Richness"),
	)

	pts = base_sm.mark_circle(size=70, opacity=0.7).encode(
	color=alt.Color(
	"region:N",
	scale=alt.Scale(domain=["Ge", "Go"], range=["#1f77b4", "#ff7f0e"]),
	),
	opacity=alt.condition(panel_selection, alt.value(1.0), alt.value(0.35)),
	stroke=alt.condition(panel_selection, alt.value("black"), alt.value(None)),
	tooltip=["site_id", "region", "mean_yield", "richness"],
	).add_params(panel_selection)

	if show_trend.value and len(long) >= 2:
	xmin = float(long.mean_yield.min())
	xmax = float(long.mean_yield.max())
	if xmax > xmin:
	trend = base_sm.transform_regression(
	"mean_yield", "richness", groupby=["group"], extent=[xmin, xmax]
	).mark_line(color="black", strokeDash=[4, 2])
	layer = pts + trend
	else:
	layer = pts
	else:
	layer = pts

	small_multiples = layer.properties(width=240, height=240).facet(
	column=alt.Column("group:N", sort=["Woody", "Herbaceous", "Bryophyte"], title=None)
	).resolve_scale(y="independent")
	small_multiples
	return


	@app.cell
	def _(mo):
	mo.md(r"""
	## Visual 3 — Stacked glyph array sorted by yield (novel design)

	One glyph per site, arranged left-to-right by yield rank (lowest → highest).
	Glyph height encodes total species richness; colours encode the three species groups.
	The expected wedge — tall on the left, shrinking to the right — is the visual
	signature of selective biodiversity loss.
	""")
	return


	@app.cell
	def _(alt, filtered, mo, pd):
	if len(filtered) == 0:
	combined = mo.md(
	"No sites match the current filters — adjust the dominance range above."
	)
	else:
	glyph_long = pd.melt(
	filtered,
	id_vars=["site_id", "region", "mean_yield", "yield_rank"],
	value_vars=["woody_richness", "herb_richness", "bryophyte_richness"],
	var_name="group", value_name="count",
	)
	group_map = {
	"woody_richness": "Woody",
	"herb_richness": "Herbaceous",
	"bryophyte_richness": "Bryophyte",
	}
	glyph_long["group"] = glyph_long["group"].map(group_map)

	glyphs = alt.Chart(glyph_long).mark_bar(size=10).encode(
	x=alt.X("yield_rank:O", title="Site rank (low → high yield)",
	axis=alt.Axis(labels=False, ticks=False)),
	y=alt.Y("count:Q", stack="zero", title="Species richness"),
	color=alt.Color(
	"group:N",
	scale=alt.Scale(
	domain=["Woody", "Herbaceous", "Bryophyte"],
	range=["#2ca02c", "#d62728", "#1f77b4"],
	),
	legend=alt.Legend(title="Species group"),
	),
	order=alt.Order("group:N", sort="ascending"),
	tooltip=[
	"site_id", "region", "yield_rank",
	alt.Tooltip("mean_yield:Q", format=".0f"),
	"group", "count",
	],
	).properties(width=720, height=240)

	yield_line = alt.Chart(filtered).mark_line(color="gray", strokeWidth=1).encode(
	x=alt.X("yield_rank:O", axis=None),
	y=alt.Y("mean_yield:Q", title="Yield (kg/ha)"),
	).properties(width=720, height=60)

	combined = alt.vconcat(yield_line, glyphs).resolve_scale(x="shared")
	combined
	return


	@app.cell
	def _(mo):
	mo.md(r"""
	---
	### Notes for reviewers
	- All three visuals share the region filter and dominance slider at the top.
	- Visual 1 and Visual 3 are linked through site identity: selecting a point in
	Visual 1 highlights its species breakdown; site ordering in Visual 3 preserves
	the identity of each point so you can cross-reference.
	- The full design rationale, including the NUF scoring of all ten diverge sketches,
	is in the accompanying Part 3 report.
	""")
	return


	if __name__ == "__main__":
	app.run()