""" PaleoData Explorer — Visualizations ==================================== Produces two core scientific visuals from a cleaned occurrence DataFrame: 1. **Paleogeographic Map** (Folium + streamlit-folium) Plots fossil localities using paleocoordinates (``paleolat`` / ``paleolng``), i.e. where the organism lived accounting for tectonic drift. 2. **Deep-Time Timeline** (Plotly) A horizontal range chart showing the stratigraphic lifespan of each taxon. The X‑axis (geological time in Ma) is **reversed** so that older dates appear on the left and younger dates on the right, as is standard in geology. """ from __future__ import annotations import logging from typing import Dict, List, Optional import folium from folium.plugins import MarkerCluster import numpy as np import pandas as pd import plotly.express as px import plotly.graph_objects as go logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Constants # --------------------------------------------------------------------------- DEFAULT_MAP_ZOOM = 3 MAX_MARKERS_MAP = 1500 # cap markers for performance # --------------------------------------------------------------------------- # 1. Paleogeographic Map (Folium) # --------------------------------------------------------------------------- def build_paleo_map( df: pd.DataFrame, *, height: int = 550, zoom: int = DEFAULT_MAP_ZOOM, ) -> folium.Map: """Create a Folium map of fossil paleocoordinates. Parameters ---------- df : pd.DataFrame Cleaned occurrence DataFrame. Must contain at least ``paleolat``, ``paleolng``, ``matched_name``. height : int Height in pixels passed to ``st_folium``. zoom : int Initial zoom level. Returns ------- folium.Map The ready-to-render Folium map object. """ if df.empty: logger.warning("Empty DataFrame passed to build_paleo_map") return folium.Map(location=[0, 0], zoom_start=zoom, tiles="OpenStreetMap") # Centre on the median paleocoordinate center_lat = float(df["paleolat"].median()) center_lng = float(df["paleolng"].median()) m = folium.Map( location=[center_lat, center_lng], zoom_start=zoom, tiles="CartoDB positron", control_scale=True, ) # Sample if too many points (keeps the map snappy) work_df = df if len(work_df) > MAX_MARKERS_MAP: logger.info("Sampling %d points down to %d for map rendering", len(work_df), MAX_MARKERS_MAP) work_df = work_df.sample(n=MAX_MARKERS_MAP, random_state=42) cluster = MarkerCluster(name="Fossil Occurrences").add_to(m) for _, row in work_df.iterrows(): lat = float(row["paleolat"]) lng = float(row["paleolng"]) if not (np.isfinite(lat) and np.isfinite(lng)): continue name = row.get("matched_name", "Unknown") early = row.get("early_interval", "") max_ma_val = row.get("max_ma", np.nan) min_ma_val = row.get("min_ma", np.nan) tooltip_lines = [f"{name}"] if early and isinstance(early, str): tooltip_lines.append(f"Interval: {early}") if np.isfinite(max_ma_val) and np.isfinite(min_ma_val): tooltip_lines.append(f"Age: {max_ma_val:.1f}–{min_ma_val:.1f} Ma") folium.CircleMarker( location=[lat, lng], radius=5, color="#c0392b", fill=True, fill_color="#e74c3c", fill_opacity=0.7, tooltip=folium.Tooltip("
".join(tooltip_lines)), popup=folium.Popup(name, parse_html=False, max_width=200), ).add_to(cluster) folium.LayerControl().add_to(m) return m # --------------------------------------------------------------------------- # 2. Deep-Time Timeline (Plotly) # --------------------------------------------------------------------------- def build_timeline( df: pd.DataFrame, *, max_taxa: int = 50, height: int = 700, ) -> go.Figure: """Build a Plotly horizontal range chart of taxon stratigraphic ranges. Geological convention: the X‑axis (Time, Ma) is **reversed** so that older dates sit on the left. Parameters ---------- df : pd.DataFrame Cleaned occurrence DataFrame (requires ``matched_name``, ``max_ma``, ``min_ma``, ``middle_age``). max_taxa : int Maximum number of unique taxa to display (top‑N by oldest age). height : int Figure height in pixels. Returns ------- go.Figure Plotly figure ready for ``st.plotly_chart``. """ if df.empty: logger.warning("Empty DataFrame passed to build_timeline") fig = go.Figure() fig.update_layout( title="No data to display", xaxis_title="Time (Ma)", yaxis_title="Taxon", height=300, ) fig.update_xaxes(autorange="reversed") return fig # Aggregate to unique taxa: take the overall max_ma / min_ma per name agg: pd.DataFrame = ( df.groupby("matched_name", as_index=False) .agg(max_ma=("max_ma", "max"), min_ma=("min_ma", "min")) .dropna(subset=["max_ma", "min_ma"]) ) if agg.empty: logger.warning("No taxa with valid temporal data remain after aggregation") fig = go.Figure() fig.update_layout(title="No taxa with valid age data", height=300) fig.update_xaxes(autorange="reversed") return fig agg["middle_age"] = (agg["max_ma"] + agg["min_ma"]) / 2.0 # Keep top-N by oldest age (largest max_ma) agg = agg.nlargest(max_taxa, "max_ma") agg = agg.sort_values("middle_age", ascending=False) fig = go.Figure() # Draw horizontal line segments for each taxon for _, row in agg.iterrows(): fig.add_trace( go.Scatter( x=[row["max_ma"], row["min_ma"]], y=[row["matched_name"], row["matched_name"]], mode="lines+markers", line={"color": "#2c3e50", "width": 6}, marker={"size": 6, "color": "#e74c3c"}, name=row["matched_name"], hovertemplate=( f"{row['matched_name']}
" f"Range: {row['max_ma']:.2f}–{row['min_ma']:.2f} Ma
" f"" ), showlegend=False, ) ) fig.update_layout( title="Fossil Taxon Stratigraphic Ranges", xaxis_title="Time (Ma)", yaxis_title="", height=max(height, 100 + 25 * len(agg)), margin={"l": 10, "r": 20, "t": 40, "b": 40}, hovermode="closest", ) # Reverse X-axis (older → younger = left → right) fig.update_xaxes(autorange="reversed") return fig