import marimo

__generated_with = "0.23.2"
app = marimo.App(
    width="full",
    app_title="Ethiopia Coffee Agroforests — Biodiversity vs Yield",
)


@app.cell
def _():
    import marimo as mo
    import pandas as pd
    import altair as alt
    import numpy as np
    from pathlib import Path

    return Path, alt, mo, np, pd


@app.cell
def _(mo):
    mo.md(r"""
    # Ethiopia Coffee Agroforests — Biodiversity vs Yield

    **Remote Group 2** · Alperen Aydos, Arif Erkovan, Melih Yilmaz
    Persona: *Sofia Almeida — biodiversity conservation activist*

    Three linked custom visualisations exploring the trade-off between coffee yield and
    plant biodiversity across 60 agroforest sites in the Ge and Go regions of Ethiopia.
    """)
    return


@app.cell
def _(Path, np, pd):
    # --- Data loading ---
    # Point DATA_PATH to your real CSV. If the file is missing, a synthetic dataset
    # that matches the described structure is generated so the notebook still runs.
    DATA_PATH = Path("data/coffee_sites.csv")

    if DATA_PATH.exists():
        df = pd.read_csv(DATA_PATH)
    else:
        rng = np.random.default_rng(42)
        n_ge, n_go = 30, 30
        ge = pd.DataFrame({
            "site_id": [f"Ge{i+1:02d}" for i in range(n_ge)],
            "region": "Ge",
            "mean_yield": rng.normal(886, 300, n_ge).clip(60, 2400),
            "woody_richness": rng.normal(22, 5, n_ge).clip(5, 40).round().astype(int),
            "herb_richness": rng.normal(48, 10, n_ge).clip(15, 90).round().astype(int),
            "bryophyte_richness": rng.normal(13, 4, n_ge).clip(2, 30).round().astype(int),
            "dominance": rng.uniform(0.7, 0.99, n_ge),
        })
        go = pd.DataFrame({
            "site_id": [f"Go{i+1:02d}" for i in range(n_go)],
            "region": "Go",
            "mean_yield": rng.normal(1158, 350, n_go).clip(60, 2400),
            "woody_richness": rng.normal(14, 4, n_go).clip(3, 30).round().astype(int),
            "herb_richness": rng.normal(35, 9, n_go).clip(10, 75).round().astype(int),
            "bryophyte_richness": rng.normal(8, 3, n_go).clip(2, 25).round().astype(int),
            "dominance": rng.uniform(0.92, 0.996, n_go),
        })
        df = pd.concat([ge, go], ignore_index=True)
        df["total_richness"] = df.woody_richness + df.herb_richness + df.bryophyte_richness
        df["yield_rank"] = df.mean_yield.rank(method="first").astype(int)

    if "total_richness" not in df.columns:
        df["total_richness"] = df.woody_richness + df.herb_richness + df.bryophyte_richness
    if "yield_rank" not in df.columns:
        df["yield_rank"] = df.mean_yield.rank(method="first").astype(int)

    df = df.sort_values("yield_rank").reset_index(drop=True)
    return (df,)


@app.cell
def _(df, mo):
    # --- Global controls (used by all three visuals) ---
    region_filter = mo.ui.multiselect(
        options=["Ge", "Go"], value=["Ge", "Go"], label="Region"
    )
    dominance_range = mo.ui.range_slider(
        start=float(df.dominance.min().round(2)),
        stop=float(df.dominance.max().round(2)),
        step=0.01,
        value=(float(df.dominance.min().round(2)), float(df.dominance.max().round(2))),
        label="Coffee dominance",
        show_value=True,
    )
    show_trend = mo.ui.checkbox(value=True, label="Show trend line (Visual 2)")
    site_options = sorted(df.site_id.unique(), key=lambda s: (s[:2], int(s[2:])))
    site_picker = mo.ui.dropdown(
        options=["(pick a site)"] + site_options,
        value="(pick a site)",
        label="Selected site",
    )

    mo.hstack(
        [region_filter, dominance_range, show_trend, site_picker],
        justify="start", gap=2,
    )
    return dominance_range, region_filter, show_trend, site_picker


@app.cell
def _(df, dominance_range, pd, region_filter):
    filtered = df[
        df.region.isin(region_filter.value)
        & df.dominance.between(dominance_range.value[0], dominance_range.value[1])
    ].copy()
    DOMINANCE_BINS = [0, 0.80, 0.90, 0.95, 1.01]
    DOMINANCE_LABELS = ["<0.80", "0.80-0.90", "0.90-0.95", ">0.95"]
    filtered["dominance_bucket"] = pd.cut(
        filtered["dominance"], bins=DOMINANCE_BINS, labels=DOMINANCE_LABELS
    ).astype(str)
    return DOMINANCE_LABELS, filtered


@app.cell
def _(mo):
    mo.md(r"""
    ## Visual 1 — Win-win scatter with linked species breakdown

    Yield × total species richness, coloured by region, sized by coffee dominance
    bucket. Median lines split the plot into four quadrants. Use the **"Selected
    site" dropdown** at the top to drill into a site's species composition (shown
    below the scatter); **click a bucket** in the dominance legend to filter the
    scatter to that management-intensity band.
    """)
    return


@app.cell
def _(DOMINANCE_LABELS, alt, df, filtered, mo, pd_DataFrame):
    yield_max = float(df.mean_yield.max()) * 1.05
    richness_max = float(df.total_richness.max()) * 1.1

    base = alt.Chart(filtered).properties(width=620, height=380)

    points = base.mark_circle(opacity=0.75, stroke="white", strokeWidth=0.5).encode(
        x=alt.X("mean_yield:Q", title="Mean yield (kg/ha)",
                scale=alt.Scale(domain=[0, yield_max])),
        y=alt.Y("total_richness:Q", title="Total species richness",
                scale=alt.Scale(domain=[0, richness_max])),
        color=alt.Color(
            "region:N",
            scale=alt.Scale(domain=["Ge", "Go"], range=["#1f77b4", "#ff7f0e"]),
            legend=alt.Legend(title="Region"),
        ),
        size=alt.Size(
            "dominance_bucket:N",
            scale=alt.Scale(domain=DOMINANCE_LABELS, range=[50, 80, 120, 180]),
            sort=DOMINANCE_LABELS,
            legend=alt.Legend(
                title="Coffee dominance",
                symbolFillColor="#555",
                symbolStrokeWidth=0,
                symbolOpacity=1,
            ),
        ),
        tooltip=[
            "site_id", "region",
            alt.Tooltip("mean_yield:Q", format=".0f", title="Yield (kg/ha)"),
            "total_richness", "woody_richness", "herb_richness", "bryophyte_richness",
            alt.Tooltip("dominance:Q", format=".3f"),
            alt.Tooltip("dominance_bucket:N", title="Dominance bucket"),
        ],
    )

    scatter = points
    if len(filtered) > 0:
        median_yield = float(filtered.mean_yield.median())
        median_richness = float(filtered.total_richness.median())
        vline = alt.Chart(filtered).mark_rule(
            strokeDash=[4, 4], color="gray"
        ).encode(x=alt.datum(median_yield))
        hline = alt.Chart(filtered).mark_rule(
            strokeDash=[4, 4], color="gray"
        ).encode(y=alt.datum(median_richness))
        scatter = scatter + vline + hline

        ymin = float(filtered.mean_yield.min())
        ymax = float(filtered.mean_yield.max())
        rmin = float(filtered.total_richness.min())
        rmax = float(filtered.total_richness.max())
        right_labels = alt.Chart(pd_DataFrame([
            {"x": ymax, "y": rmax, "label": "Win-win"},
            {"x": ymax, "y": rmin, "label": "Concern"},
        ])).mark_text(
            fontSize=11, fontWeight="bold", color="#555", align="right", dx=-4, dy=0
        ).encode(x="x:Q", y="y:Q", text="label:N")
        left_labels = alt.Chart(pd_DataFrame([
            {"x": ymin, "y": rmax, "label": "Low yield / rich"},
            {"x": ymin, "y": rmin, "label": "Low yield / poor"},
        ])).mark_text(
            fontSize=11, fontWeight="bold", color="#555", align="left", dx=4, dy=0
        ).encode(x="x:Q", y="y:Q", text="label:N")
        scatter = scatter + right_labels + left_labels
        chart1 = mo.ui.altair_chart(
            scatter, chart_selection="point",
            legend_selection=["region", "dominance_bucket"],
        )
    else:
        # When the user's slider+region filter excludes every site, building a
        # mo.ui.altair_chart with chart_selection / legend_selection on an empty
        # dataframe causes the marimo kernel to return 500 (the Vega selection
        # cannot be registered against zero rows). Render a placeholder chart
        # with no selections so the dropdown still works downstream.
        chart1 = mo.ui.altair_chart(scatter)
    chart1
    return (chart1,)


@app.cell
def _(pd):
    # helper so the cell above can build a DataFrame inline
    def pd_DataFrame(x):
        return pd.DataFrame(x)

    return (pd_DataFrame,)


@app.cell
def _(alt, chart1, df, mo, pd, site_picker):
    # --- Linked species breakdown for the selected site(s) ---
    # Prefer the dropdown picker; fall back to the scatter click selection.
    picked = site_picker.value
    if picked and picked != "(pick a site)":
        sel = df[df.site_id == picked]
    else:
        sel = chart1.value  # pandas DataFrame of currently selected rows
    if sel is None or len(sel) == 0:
        detail = mo.md(
            "*Pick a site from the **\"Selected site\" dropdown** at the top to see its "
            "species composition.*"
        )
    elif len(sel) > 1:
        detail = mo.md(
            f"*{len(sel)} sites currently match the active filters. "
            "Pick one from the **\"Selected site\" dropdown** at the top to see its species "
            "composition.*"
        )
    else:
        site = sel.iloc[0]
        long_df = pd.DataFrame({
            "group": ["Woody", "Herbaceous", "Bryophyte"],
            "count": [site.woody_richness, site.herb_richness, site.bryophyte_richness],
        })
        bar = alt.Chart(long_df).mark_bar().encode(
            x=alt.X("count:Q", title="Species count"),
            y=alt.Y("group:N", sort=["Woody", "Herbaceous", "Bryophyte"], title=None),
            color=alt.Color(
                "group:N",
                scale=alt.Scale(
                    domain=["Woody", "Herbaceous", "Bryophyte"],
                    range=["#2ca02c", "#d62728", "#1f77b4"],
                ),
                legend=None,
            ),
            tooltip=["group", "count"],
        ).properties(
            width=450, height=140,
            title=f"Site {site.site_id} · {site.region} · yield {site.mean_yield:.0f} kg/ha "
                  f"· total richness {int(site.total_richness)}"
        )
        detail = mo.ui.altair_chart(bar)
    detail
    return


@app.cell
def _(mo):
    mo.md(r"""
    ## Visual 2 — Small multiples: yield × richness per species group

    Three panels sharing the same x-axis (yield). The slopes differ dramatically between
    groups: woody responds strongly to yield pressure, bryophytes almost not at all.
    """)
    return


@app.cell
def _(alt, filtered, mo, pd, show_trend):
    if len(filtered) == 0:
        small_multiples = mo.md(
            "*No sites match the current filters — adjust the dominance range above.*"
        )
    else:
        long = pd.melt(
            filtered,
            id_vars=["site_id", "region", "mean_yield"],
            value_vars=["woody_richness", "herb_richness", "bryophyte_richness"],
            var_name="group", value_name="richness",
        )
        long["group"] = long["group"].map({
            "woody_richness": "Woody",
            "herb_richness": "Herbaceous",
            "bryophyte_richness": "Bryophyte",
        })

        panel_selection = alt.selection_point(fields=["site_id"], on="mouseover", empty=False)

        base_sm = alt.Chart(long).encode(
            x=alt.X("mean_yield:Q", title="Mean yield (kg/ha)"),
            y=alt.Y("richness:Q", title="Richness"),
        )

        pts = base_sm.mark_circle(size=70, opacity=0.7).encode(
            color=alt.Color(
                "region:N",
                scale=alt.Scale(domain=["Ge", "Go"], range=["#1f77b4", "#ff7f0e"]),
            ),
            opacity=alt.condition(panel_selection, alt.value(1.0), alt.value(0.35)),
            stroke=alt.condition(panel_selection, alt.value("black"), alt.value(None)),
            tooltip=["site_id", "region", "mean_yield", "richness"],
        ).add_params(panel_selection)

        if show_trend.value and len(long) >= 2:
            xmin = float(long.mean_yield.min())
            xmax = float(long.mean_yield.max())
            if xmax > xmin:
                trend = base_sm.transform_regression(
                    "mean_yield", "richness", groupby=["group"], extent=[xmin, xmax]
                ).mark_line(color="black", strokeDash=[4, 2])
                layer = pts + trend
            else:
                layer = pts
        else:
            layer = pts

        small_multiples = layer.properties(width=240, height=240).facet(
            column=alt.Column("group:N", sort=["Woody", "Herbaceous", "Bryophyte"], title=None)
        ).resolve_scale(y="independent")
    small_multiples
    return


@app.cell
def _(mo):
    mo.md(r"""
    ## Visual 3 — Stacked glyph array sorted by yield (novel design)

    One glyph per site, arranged left-to-right by yield rank (lowest → highest).
    Glyph height encodes total species richness; colours encode the three species groups.
    The expected *wedge* — tall on the left, shrinking to the right — is the visual
    signature of selective biodiversity loss.
    """)
    return


@app.cell
def _(alt, filtered, mo, pd):
    if len(filtered) == 0:
        combined = mo.md(
            "*No sites match the current filters — adjust the dominance range above.*"
        )
    else:
        glyph_long = pd.melt(
            filtered,
            id_vars=["site_id", "region", "mean_yield", "yield_rank"],
            value_vars=["woody_richness", "herb_richness", "bryophyte_richness"],
            var_name="group", value_name="count",
        )
        group_map = {
            "woody_richness": "Woody",
            "herb_richness": "Herbaceous",
            "bryophyte_richness": "Bryophyte",
        }
        glyph_long["group"] = glyph_long["group"].map(group_map)

        glyphs = alt.Chart(glyph_long).mark_bar(size=10).encode(
            x=alt.X("yield_rank:O", title="Site rank (low → high yield)",
                    axis=alt.Axis(labels=False, ticks=False)),
            y=alt.Y("count:Q", stack="zero", title="Species richness"),
            color=alt.Color(
                "group:N",
                scale=alt.Scale(
                    domain=["Woody", "Herbaceous", "Bryophyte"],
                    range=["#2ca02c", "#d62728", "#1f77b4"],
                ),
                legend=alt.Legend(title="Species group"),
            ),
            order=alt.Order("group:N", sort="ascending"),
            tooltip=[
                "site_id", "region", "yield_rank",
                alt.Tooltip("mean_yield:Q", format=".0f"),
                "group", "count",
            ],
        ).properties(width=720, height=240)

        yield_line = alt.Chart(filtered).mark_line(color="gray", strokeWidth=1).encode(
            x=alt.X("yield_rank:O", axis=None),
            y=alt.Y("mean_yield:Q", title="Yield (kg/ha)"),
        ).properties(width=720, height=60)

        combined = alt.vconcat(yield_line, glyphs).resolve_scale(x="shared")
    combined
    return


@app.cell
def _(mo):
    mo.md(r"""
    ---
    ### Notes for reviewers
    - All three visuals share the region filter and dominance slider at the top.
    - Visual 1 and Visual 3 are linked through site identity: selecting a point in
      Visual 1 highlights its species breakdown; site ordering in Visual 3 preserves
      the identity of each point so you can cross-reference.
    - The full design rationale, including the NUF scoring of all ten diverge sketches,
      is in the accompanying Part 3 report.
    """)
    return


if __name__ == "__main__":
    app.run()