ArtenTracker / src /chart_builder.py
Johannes
Initial deployment (no data - downloaded from HF Dataset at startup)
0d4a0ba
import pandas as pd
import altair as alt
from src.utils import PERIODS, CURRENT_YEAR, UNIT_NAME_COL
def build_trend_chart(trend_df: pd.DataFrame, species_name: str,
year_from: int = None, year_to: int = None) -> alt.Chart:
base = alt.Chart(trend_df).encode(
x=alt.X("year:O", title="Jahr", axis=alt.Axis(labelAngle=-45)),
y=alt.Y("count:Q", title="Anzahl Funde", scale=alt.Scale(zero=True)),
tooltip=[
alt.Tooltip("year:O", title="Jahr"),
alt.Tooltip("count:Q", title="Funde"),
],
)
line = base.mark_line(color="#00cc6a", strokeWidth=2)
points = base.mark_circle(color="#00cc6a", size=40)
current_data = trend_df[trend_df["year"] == CURRENT_YEAR]
current_point = alt.Chart(current_data).mark_circle(
color="#ff6b35", size=80
).encode(
x=alt.X("year:O"),
y=alt.Y("count:Q"),
tooltip=[alt.Tooltip("year:O", title="Jahr"), alt.Tooltip("count:Q", title="Funde")],
)
y_label = f"{year_from or PERIODS[0][0]}{year_to or CURRENT_YEAR}"
chart = (line + points + current_point).properties(
title=alt.TitleParams(
text=f"Fundtrend: {species_name}",
subtitle=f"Jährliche GBIF-Funde {y_label} | Rot = aktuelles Jahr",
fontSize=14,
subtitleFontSize=11,
),
height=300,
).interactive()
return chart
def prepare_per_unit_data(
df: pd.DataFrame,
unit_col: str,
max_units: int = 20,
) -> tuple[list[str], pd.DataFrame]:
"""Return (ordered list of top unit names, aggregated year/count DataFrame).
The returned DataFrame has columns [unit_col, 'year', 'count'].
Units are ordered by total descending.
"""
if unit_col not in df.columns or "year" not in df.columns:
return [], pd.DataFrame()
data = (
df.dropna(subset=["year", unit_col])
.assign(year=lambda d: d["year"].astype(int))
.groupby([unit_col, "year"])
.size()
.reset_index(name="count")
)
if data.empty:
return [], pd.DataFrame()
top_units = (
data.groupby(unit_col)["count"].sum()
.nlargest(max_units).index.tolist()
)
data = data[data[unit_col].isin(top_units)].copy()
return top_units, data
def build_unit_row_chart(unit_data: pd.DataFrame,
y_max: int | None = None) -> alt.Chart:
"""Full-width line chart for a single geographic unit, same style as overall trend.
y_max: shared Y-axis maximum across all unit charts for comparability.
"""
y_scale = alt.Scale(zero=True, domain=[0, y_max]) if y_max is not None else alt.Scale(zero=True)
base = alt.Chart(unit_data).encode(
x=alt.X("year:O", title="Jahr", axis=alt.Axis(labelAngle=-45)),
y=alt.Y("count:Q", title="Funde", scale=y_scale),
tooltip=[
alt.Tooltip("year:O", title="Jahr"),
alt.Tooltip("count:Q", title="Funde"),
],
)
line = base.mark_line(color="#00cc6a", strokeWidth=2)
points = base.mark_circle(color="#00cc6a", size=40)
current_data = unit_data[unit_data["year"] == CURRENT_YEAR]
current_point = alt.Chart(current_data).mark_circle(
color="#ff6b35", size=80
).encode(
x=alt.X("year:O"),
y=alt.Y("count:Q", scale=y_scale),
tooltip=[alt.Tooltip("year:O", title="Jahr"), alt.Tooltip("count:Q", title="Funde")],
)
return (line + points + current_point).properties(
height=140,
).interactive()
def compute_trend_from_df(df: pd.DataFrame) -> pd.DataFrame:
"""Count occurrences per year from any DataFrame with a 'year' column."""
if df.empty or "year" not in df.columns:
return pd.DataFrame(columns=["year", "count"])
counts = (
df.dropna(subset=["year"])
.assign(year=lambda d: d["year"].astype(int))
.groupby("year")
.size()
.reset_index(name="count")
.sort_values("year")
)
return counts