Spaces:
Running
Running
| import pandas as pd | |
| import altair as alt | |
| from src.utils import PERIODS, CURRENT_YEAR, UNIT_NAME_COL | |
| def build_trend_chart(trend_df: pd.DataFrame, species_name: str, | |
| year_from: int = None, year_to: int = None) -> alt.Chart: | |
| base = alt.Chart(trend_df).encode( | |
| x=alt.X("year:O", title="Jahr", axis=alt.Axis(labelAngle=-45)), | |
| y=alt.Y("count:Q", title="Anzahl Funde", scale=alt.Scale(zero=True)), | |
| tooltip=[ | |
| alt.Tooltip("year:O", title="Jahr"), | |
| alt.Tooltip("count:Q", title="Funde"), | |
| ], | |
| ) | |
| line = base.mark_line(color="#00cc6a", strokeWidth=2) | |
| points = base.mark_circle(color="#00cc6a", size=40) | |
| current_data = trend_df[trend_df["year"] == CURRENT_YEAR] | |
| current_point = alt.Chart(current_data).mark_circle( | |
| color="#ff6b35", size=80 | |
| ).encode( | |
| x=alt.X("year:O"), | |
| y=alt.Y("count:Q"), | |
| tooltip=[alt.Tooltip("year:O", title="Jahr"), alt.Tooltip("count:Q", title="Funde")], | |
| ) | |
| y_label = f"{year_from or PERIODS[0][0]}–{year_to or CURRENT_YEAR}" | |
| chart = (line + points + current_point).properties( | |
| title=alt.TitleParams( | |
| text=f"Fundtrend: {species_name}", | |
| subtitle=f"Jährliche GBIF-Funde {y_label} | Rot = aktuelles Jahr", | |
| fontSize=14, | |
| subtitleFontSize=11, | |
| ), | |
| height=300, | |
| ).interactive() | |
| return chart | |
| def prepare_per_unit_data( | |
| df: pd.DataFrame, | |
| unit_col: str, | |
| max_units: int = 20, | |
| ) -> tuple[list[str], pd.DataFrame]: | |
| """Return (ordered list of top unit names, aggregated year/count DataFrame). | |
| The returned DataFrame has columns [unit_col, 'year', 'count']. | |
| Units are ordered by total descending. | |
| """ | |
| if unit_col not in df.columns or "year" not in df.columns: | |
| return [], pd.DataFrame() | |
| data = ( | |
| df.dropna(subset=["year", unit_col]) | |
| .assign(year=lambda d: d["year"].astype(int)) | |
| .groupby([unit_col, "year"]) | |
| .size() | |
| .reset_index(name="count") | |
| ) | |
| if data.empty: | |
| return [], pd.DataFrame() | |
| top_units = ( | |
| data.groupby(unit_col)["count"].sum() | |
| .nlargest(max_units).index.tolist() | |
| ) | |
| data = data[data[unit_col].isin(top_units)].copy() | |
| return top_units, data | |
| def build_unit_row_chart(unit_data: pd.DataFrame, | |
| y_max: int | None = None) -> alt.Chart: | |
| """Full-width line chart for a single geographic unit, same style as overall trend. | |
| y_max: shared Y-axis maximum across all unit charts for comparability. | |
| """ | |
| y_scale = alt.Scale(zero=True, domain=[0, y_max]) if y_max is not None else alt.Scale(zero=True) | |
| base = alt.Chart(unit_data).encode( | |
| x=alt.X("year:O", title="Jahr", axis=alt.Axis(labelAngle=-45)), | |
| y=alt.Y("count:Q", title="Funde", scale=y_scale), | |
| tooltip=[ | |
| alt.Tooltip("year:O", title="Jahr"), | |
| alt.Tooltip("count:Q", title="Funde"), | |
| ], | |
| ) | |
| line = base.mark_line(color="#00cc6a", strokeWidth=2) | |
| points = base.mark_circle(color="#00cc6a", size=40) | |
| current_data = unit_data[unit_data["year"] == CURRENT_YEAR] | |
| current_point = alt.Chart(current_data).mark_circle( | |
| color="#ff6b35", size=80 | |
| ).encode( | |
| x=alt.X("year:O"), | |
| y=alt.Y("count:Q", scale=y_scale), | |
| tooltip=[alt.Tooltip("year:O", title="Jahr"), alt.Tooltip("count:Q", title="Funde")], | |
| ) | |
| return (line + points + current_point).properties( | |
| height=140, | |
| ).interactive() | |
| def compute_trend_from_df(df: pd.DataFrame) -> pd.DataFrame: | |
| """Count occurrences per year from any DataFrame with a 'year' column.""" | |
| if df.empty or "year" not in df.columns: | |
| return pd.DataFrame(columns=["year", "count"]) | |
| counts = ( | |
| df.dropna(subset=["year"]) | |
| .assign(year=lambda d: d["year"].astype(int)) | |
| .groupby("year") | |
| .size() | |
| .reset_index(name="count") | |
| .sort_values("year") | |
| ) | |
| return counts | |