PaleoData-Explorer / visualizations.py
benjamintia's picture
Upload folder using huggingface_hub
81f7f5d verified
Raw
History Blame Contribute Delete
6.95 kB
"""
PaleoData Explorer — Visualizations
====================================
Produces two core scientific visuals from a cleaned occurrence DataFrame:
1. **Paleogeographic Map** (Folium + streamlit-folium)
Plots fossil localities using paleocoordinates (``paleolat`` /
``paleolng``), i.e. where the organism lived accounting for
tectonic drift.
2. **Deep-Time Timeline** (Plotly)
A horizontal range chart showing the stratigraphic lifespan of
each taxon. The X‑axis (geological time in Ma) is **reversed**
so that older dates appear on the left and younger dates on the
right, as is standard in geology.
"""
from __future__ import annotations
import logging
from typing import Dict, List, Optional
import folium
from folium.plugins import MarkerCluster
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
DEFAULT_MAP_ZOOM = 3
MAX_MARKERS_MAP = 1500 # cap markers for performance
# ---------------------------------------------------------------------------
# 1. Paleogeographic Map (Folium)
# ---------------------------------------------------------------------------
def build_paleo_map(
df: pd.DataFrame,
*,
height: int = 550,
zoom: int = DEFAULT_MAP_ZOOM,
) -> folium.Map:
"""Create a Folium map of fossil paleocoordinates.
Parameters
----------
df : pd.DataFrame
Cleaned occurrence DataFrame. Must contain at least
``paleolat``, ``paleolng``, ``matched_name``.
height : int
Height in pixels passed to ``st_folium``.
zoom : int
Initial zoom level.
Returns
-------
folium.Map
The ready-to-render Folium map object.
"""
if df.empty:
logger.warning("Empty DataFrame passed to build_paleo_map")
return folium.Map(location=[0, 0], zoom_start=zoom, tiles="OpenStreetMap")
# Centre on the median paleocoordinate
center_lat = float(df["paleolat"].median())
center_lng = float(df["paleolng"].median())
m = folium.Map(
location=[center_lat, center_lng],
zoom_start=zoom,
tiles="CartoDB positron",
control_scale=True,
)
# Sample if too many points (keeps the map snappy)
work_df = df
if len(work_df) > MAX_MARKERS_MAP:
logger.info("Sampling %d points down to %d for map rendering",
len(work_df), MAX_MARKERS_MAP)
work_df = work_df.sample(n=MAX_MARKERS_MAP, random_state=42)
cluster = MarkerCluster(name="Fossil Occurrences").add_to(m)
for _, row in work_df.iterrows():
lat = float(row["paleolat"])
lng = float(row["paleolng"])
if not (np.isfinite(lat) and np.isfinite(lng)):
continue
name = row.get("matched_name", "Unknown")
early = row.get("early_interval", "")
max_ma_val = row.get("max_ma", np.nan)
min_ma_val = row.get("min_ma", np.nan)
tooltip_lines = [f"<b>{name}</b>"]
if early and isinstance(early, str):
tooltip_lines.append(f"Interval: {early}")
if np.isfinite(max_ma_val) and np.isfinite(min_ma_val):
tooltip_lines.append(f"Age: {max_ma_val:.1f}{min_ma_val:.1f} Ma")
folium.CircleMarker(
location=[lat, lng],
radius=5,
color="#c0392b",
fill=True,
fill_color="#e74c3c",
fill_opacity=0.7,
tooltip=folium.Tooltip("<br>".join(tooltip_lines)),
popup=folium.Popup(name, parse_html=False, max_width=200),
).add_to(cluster)
folium.LayerControl().add_to(m)
return m
# ---------------------------------------------------------------------------
# 2. Deep-Time Timeline (Plotly)
# ---------------------------------------------------------------------------
def build_timeline(
df: pd.DataFrame,
*,
max_taxa: int = 50,
height: int = 700,
) -> go.Figure:
"""Build a Plotly horizontal range chart of taxon stratigraphic ranges.
Geological convention: the X‑axis (Time, Ma) is **reversed** so
that older dates sit on the left.
Parameters
----------
df : pd.DataFrame
Cleaned occurrence DataFrame (requires ``matched_name``,
``max_ma``, ``min_ma``, ``middle_age``).
max_taxa : int
Maximum number of unique taxa to display (top‑N by oldest age).
height : int
Figure height in pixels.
Returns
-------
go.Figure
Plotly figure ready for ``st.plotly_chart``.
"""
if df.empty:
logger.warning("Empty DataFrame passed to build_timeline")
fig = go.Figure()
fig.update_layout(
title="No data to display",
xaxis_title="Time (Ma)",
yaxis_title="Taxon",
height=300,
)
fig.update_xaxes(autorange="reversed")
return fig
# Aggregate to unique taxa: take the overall max_ma / min_ma per name
agg: pd.DataFrame = (
df.groupby("matched_name", as_index=False)
.agg(max_ma=("max_ma", "max"), min_ma=("min_ma", "min"))
.dropna(subset=["max_ma", "min_ma"])
)
if agg.empty:
logger.warning("No taxa with valid temporal data remain after aggregation")
fig = go.Figure()
fig.update_layout(title="No taxa with valid age data", height=300)
fig.update_xaxes(autorange="reversed")
return fig
agg["middle_age"] = (agg["max_ma"] + agg["min_ma"]) / 2.0
# Keep top-N by oldest age (largest max_ma)
agg = agg.nlargest(max_taxa, "max_ma")
agg = agg.sort_values("middle_age", ascending=False)
fig = go.Figure()
# Draw horizontal line segments for each taxon
for _, row in agg.iterrows():
fig.add_trace(
go.Scatter(
x=[row["max_ma"], row["min_ma"]],
y=[row["matched_name"], row["matched_name"]],
mode="lines+markers",
line={"color": "#2c3e50", "width": 6},
marker={"size": 6, "color": "#e74c3c"},
name=row["matched_name"],
hovertemplate=(
f"<b>{row['matched_name']}</b><br>"
f"Range: {row['max_ma']:.2f}{row['min_ma']:.2f} Ma<br>"
f"<extra></extra>"
),
showlegend=False,
)
)
fig.update_layout(
title="Fossil Taxon Stratigraphic Ranges",
xaxis_title="Time (Ma)",
yaxis_title="",
height=max(height, 100 + 25 * len(agg)),
margin={"l": 10, "r": 20, "t": 40, "b": 40},
hovermode="closest",
)
# Reverse X-axis (older → younger = left → right)
fig.update_xaxes(autorange="reversed")
return fig