import marimo
__generated_with = "0.21.1"
app = marimo.App(width="medium")
@app.cell
async def _():
import sys
import io
import math
import marimo as mo
import pandas as pd
import openpyxl
return io, math, mo, pd
@app.cell
def _(mo):
# Buttons to upload the data files required for the visualisation
upload_yield = mo.ui.file(label="Upload: Coffee_yield.xlsx", kind="button", multiple=False)
upload_species = mo.ui.file(label="Upload: Plant_species_and_average...xlsx", kind="button", multiple=False)
upload_decomp = mo.ui.file(label="Upload: Total_species_composition.xlsx", kind="button", multiple=False)
# Assign to variable and display the UI
upload_ui = mo.vstack([
mo.md("### Please provide the correct data files to view the visual"),
upload_yield,
upload_species,
upload_decomp
], align="center")
# Place as the last statement to ensure Marimo renders it!
upload_ui
return upload_decomp, upload_species, upload_yield, upload_ui
@app.cell
def _(io, mo, pd, upload_decomp, upload_species, upload_yield):
# Exectution of this cell and everything below is paused until all files are uploaded
mo.stop(
not upload_yield.value or not upload_species.value or not upload_decomp.value,
mo.md("*Waiting for all three files to be uploaded...*")
)
###### PREPARATION ######
# Read the uploaded files from browser memory
df_yield = pd.read_excel(io.BytesIO(upload_yield.value[0].contents))
df_species = pd.read_excel(io.BytesIO(upload_species.value[0].contents))
df_decomposition = pd.read_excel(io.BytesIO(upload_decomp.value[0].contents))
# Hardcoded column names used in the data files
COL_SPECIES_NAME = "Species name"
COL_SPECIES_GROUP = "Species group"
COL_DECOMP_SPECIES = df_decomposition.columns[0]
# Build a site -> yield lookup from Coffee_yield.xlsx
site_yield_map = dict(zip(df_yield["Site ID"].astype(str), df_yield["Mean_CC_Yield"]))
# In `Plant_species_and_average_yield.xlsx`, empty group cells are filled
df_species[COL_SPECIES_GROUP] = df_species[COL_SPECIES_GROUP].ffill()
GROUPS = df_species[COL_SPECIES_GROUP].dropna().unique().tolist()
# In `Total_species_decomposition.xlsx`, set index to species for easy row lookups
df_decomposition.set_index(COL_DECOMP_SPECIES, inplace=True)
ALL_SITES = [str(col) for col in df_decomposition.columns]
# Build the species_data dictionary >> dictionary used to build the visual later
species_data = []
for idx, row in df_species.iterrows():
sp_id = str(row[COL_SPECIES_NAME])
group = str(row[COL_SPECIES_GROUP]) if COL_SPECIES_GROUP in df_species.columns else GROUPS[idx % len(GROUPS)]
# First: find which sites this species occurs in
present_in = []
if sp_id in df_decomposition.index:
species_row = df_decomposition.loc[sp_id]
sites_with_species = species_row[species_row == 1]
present_in = sites_with_species.index.astype(str).tolist()
# Then: compute mean yield across those sites using Coffee_yield.xlsx
site_yields = [site_yield_map[site] for site in present_in if site in site_yield_map]
avg_yield = sum(site_yields) / len(site_yields) if site_yields else 0.0
species_data.append({
"id": sp_id,
"group": group,
"yield": avg_yield,
"num_sites": len(present_in),
"sites": present_in
})
# Sort species from most common (center) to least common (edge)
species_data.sort(key=lambda x: x["num_sites"], reverse=True)
return ALL_SITES, GROUPS, species_data
@app.cell
def _(ALL_SITES, mo, species_data):
# Create UI controls
total_species = len(species_data)
slider_count = mo.ui.slider(
start=5, stop=total_species, step=1, value=int(total_species/2), label="Species shown:"
)
drop_single = mo.ui.dropdown(
options=ALL_SITES, value=ALL_SITES[0] if ALL_SITES else "", label="Highlight Site:"
)
drop_comp1 = mo.ui.dropdown(
options=ALL_SITES, value=ALL_SITES[0] if ALL_SITES else "", label="Compare Site 1:"
)
drop_comp2 = mo.ui.dropdown(
options=ALL_SITES, value=ALL_SITES[1] if len(ALL_SITES) > 1 else ALL_SITES[0], label="Compare Site 2:"
)
return drop_comp1, drop_comp2, drop_single, slider_count
@app.cell
def _(mo):
dropdown_styles = mo.Html("""
""")
tabs = mo.ui.tabs({
"General Overview": mo.md("*Viewing all species colored by their primary group.*"),
"Individual Site": mo.md("*Select a site using the dropdown below.*"),
"Compare Sites": mo.md("*Select two sites to compare using the dropdowns below.*"),
})
return dropdown_styles, tabs
@app.cell
def _(
drop_comp1,
drop_comp2,
drop_single,
dropdown_styles,
mo,
slider_count,
tabs,
):
_active = tabs.value
if _active == "Individual Site":
_site_selector = mo.hstack([drop_single], justify="center")
elif _active == "Compare Sites":
_site_selector = mo.hstack([drop_comp1, drop_comp2], gap=4)
else:
_site_selector = mo.Html("")
controls = mo.vstack([
dropdown_styles,
tabs,
_site_selector,
mo.hstack([slider_count], justify="center")
], align="center", gap=4)
return (controls,)
@app.cell
def _(math, tabs):
# Sunburst chart: dimensions + position
CX, CY = 500, 380
MAX_RADIUS = 350
MIN_RADIUS = 80
TW, TH = 245, 105
# Colors
preferred_hues = [30, 120, 210]
SITE_LEGEND_BKG = "#1e1e2e"
SITE_LEGEND_BORDER = "#45475a"
SITE_LEGEND_TEXT = "#cdd6f4"
SITE_COLOR_S1_ONLY = "#f5b0c6"
SITE_COLOR_S2_ONLY = "#d8b4fe"
SITE_COLOR_BOTH = "#f9e2af"
SITE_COLOR_NEITHER = "#e0e0e0"
# Helper functions
def polar_to_cartesian(cx, cy, r, angle_deg):
rad = math.radians(angle_deg)
return cx + r * math.cos(rad), cy + r * math.sin(rad)
def build_arc(cx, cy, r_inner, r_outer, start_angle, end_angle):
if end_angle - start_angle <= 0.05:
end_angle = start_angle + 0.05
p1 = polar_to_cartesian(cx, cy, r_outer, start_angle)
p2 = polar_to_cartesian(cx, cy, r_outer, end_angle)
p3 = polar_to_cartesian(cx, cy, r_inner, end_angle)
p4 = polar_to_cartesian(cx, cy, r_inner, start_angle)
large_arc = "0" if end_angle - start_angle <= 180 else "1"
return f"M {p1[0]} {p1[1]} A {r_outer} {r_outer} 0 {large_arc} 1 {p2[0]} {p2[1]} L {p3[0]} {p3[1]} A {r_inner} {r_inner} 0 {large_arc} 0 {p4[0]} {p4[1]} Z"
def make_tooltip(unique_id, tx, ty, group_name, species_name, avg_yield, num_sites):
# Bound the tooltip coordinates so it doesn't clip off the 1000x1000 SVG canvas
tx = max(10, min(tx, 1000 - TW - 10))
ty = max(10, min(ty, 1000 - TH - 10))
safe_group = group_name.strip().upper()[:40]
safe_name = species_name.strip()[:38]
yield_str = f"Average yield: {avg_yield:.1f} kg ha\u207b\u00b9"
sites_str = f"Occurs in: {num_sites} site(s)"
tip_id = unique_id.replace("seg", "tip")
return f"""