"""EEGDash Dataset Catalog — Hugging Face Space. Design system (kept in sync with ``style.css``): * Typography: Inter for UI (14px base, 600 for headings), JetBrains Mono for code snippets. Hierarchy: hero title > section titles > labels > meta. * Palette: Okabe-Ito (colorblind-safe). Brand is #0072B2 (EEG-blue). One warm accent #E69F00 reserved for the ``on 🤗`` flag — never decorative. Neutral ramp is slate (#f8fafc → #0f172a). * Encoding: categorical modality gets one Okabe-Ito hue per value. Continuous (dataset size) is never encoded by color. * Annotation: the hero, modality strip and detail panel each carry one sentence of prose so the page reads as an argument, not a data dump. """ from __future__ import annotations import ast import html as _html import json import logging import os from functools import lru_cache from pathlib import Path import gradio as gr import pandas as pd from huggingface_hub import HfApi from huggingface_hub.utils import HfHubHTTPError HF_ORG = "EEGDash" ROOT = Path(__file__).parent CSV_PATH = ROOT / "dataset_summary.csv" CSS_PATH = ROOT / "style.css" ASSETS_DIR = ROOT / "assets" EEGDASH_URL = "https://eegdash.org" GITHUB_URL = "https://github.com/eegdash/EEGDash" PYPI_URL = "https://pypi.org/project/eegdash/" DISCORD_URL = "https://discord.gg/eegdash" def _read_svg(name: str) -> str: """Read an SVG asset and strip the XML prolog so it inlines cleanly. Inlining lets us color icons via ``currentColor`` and avoids the file endpoint for tiny assets that would otherwise cost an extra round-trip. """ path = ASSETS_DIR / name if not path.exists(): return "" raw = path.read_text(encoding="utf-8") # Remove XML declaration + comments (Inkscape adds both). for marker in ("?>", "-->"): idx = raw.rfind(marker) if idx != -1 and idx < 300: raw = raw[idx + len(marker):].lstrip() return raw ICON_GITHUB = _read_svg("github.svg") ICON_PYPI = _read_svg("pypi.svg") ICON_DISCORD = _read_svg("discord.svg") SVG_MARK = _read_svg("mark.svg") SVG_BIDS = _read_svg("bids.svg") def _plot_iframe(name: str, *, height: int, title: str) -> str: """Embed a plotly plot in a sandboxed iframe. Gradio's ``gr.HTML`` strips `` """ with gr.Blocks( title="EEGDash — EEG/MEG dataset catalog", css=CSS, theme=THEME, analytics_enabled=False, head=HEAD, ) as demo: hero = gr.HTML(_hero_html(CATALOG, TOTAL_ALL), elem_classes=["eeg-hero-wrap"]) modality_strip = gr.HTML( _modality_strip_html(CATALOG), elem_classes=["eeg-modality-wrap"] ) with gr.Accordion( "Catalog views", open=True, elem_classes=["eeg-overview"], ): with gr.Tabs(elem_classes=["eeg-tabs"]): with gr.Tab("Flow"): gr.HTML( '

The catalog as a navigation ' 'map. Every dataset flows from its experimental ' 'modality (left) to its clinical population ' '(middle) to its repository (right). Ribbon ' 'thickness is the dataset count along that path — follow ' 'any color to see where a paradigm of interest lives.

' + _plot_iframe("dataset_sankey", height=640, title="Catalog flow (sankey)"), elem_classes=["eeg-plot"], ) with gr.Tab("Bubbles"): gr.HTML( '

Every dataset as an ' 'individual mark. Bubble size is recording count, color is ' 'experimental modality, axes span subjects × duration. ' 'Hover to find a specific dataset; use the filter below to ' 'narrow the field.

' + _plot_iframe("dataset_bubble", height=780, title="Dataset bubble chart"), elem_classes=["eeg-plot"], ) with gr.Tab("Treemap"): gr.HTML( '

Nested rectangles grouped by ' 'modality. Area is proportional to recording count — the ' 'biggest tiles are the heaviest contributors.

' + _plot_iframe("dataset_treemap", height=820, title="Dataset treemap"), elem_classes=["eeg-plot"], ) with gr.Tab("Growth"): gr.HTML( '

New datasets added to the ' 'catalog over time, colored by source. The slope tells you ' 'how fast the archive has expanded.

' + _plot_iframe("dataset_growth", height=520, title="Catalog growth"), elem_classes=["eeg-plot"], ) with gr.Tab("Clinical"): gr.HTML( '

Clinical populations ' 'represented in the catalog — from healthy controls to ' 'neurodegenerative and psychiatric conditions.

' + _plot_iframe("dataset_clinical", height=520, title="Clinical breakdown"), elem_classes=["eeg-plot"], ) with gr.Row(elem_classes=["eeg-toolbar"]): query = gr.Textbox( label="Search", placeholder="Type a dataset id, author, or keyword…", show_label=False, elem_classes=["eeg-search"], scale=4, ) only_on_hf = gr.Checkbox( label="Only 🤗-mirrored", value=False, elem_classes=["eeg-toggle"], scale=1, ) with gr.Accordion("Filters", open=False, elem_classes=["eeg-filters"]): with gr.Row(): modalities = gr.CheckboxGroup( label="Modality", choices=MODALITY_CHOICES, value=[] ) subject_types = gr.CheckboxGroup( label="Pathology / population", choices=SUBJECT_CHOICES, value=[] ) with gr.Row(): sources = gr.CheckboxGroup( label="Source", choices=SOURCE_CHOICES, value=[] ) licenses = gr.Dropdown( label="License", choices=LICENSE_CHOICES, multiselect=True, value=[], ) min_subjects = gr.Slider( label="Minimum subjects", minimum=0, maximum=500, step=10, value=0, ) with gr.Row(elem_classes=["eeg-main"]): with gr.Column(scale=3, elem_classes=["eeg-main__table"]): table = gr.Dataframe( value=_render_table(CATALOG), interactive=False, wrap=False, column_widths=[ "140px", "140px", "90px", "90px", "120px", "110px", "140px", "85px", "85px", "60px", "80px", "70px", "85px", "110px", "50px", ], label=None, show_label=False, elem_classes=["eeg-table"], max_height=640, ) with gr.Column(scale=2, elem_classes=["eeg-main__detail"]): detail = gr.HTML(_empty_detail(), elem_classes=["eeg-detail"]) gr.HTML( f""" """, elem_classes=["eeg-foot-wrap"], ) filter_inputs = [ query, modalities, subject_types, sources, licenses, min_subjects, only_on_hf, ] for w in filter_inputs: w.change(_on_filter, filter_inputs, [table, hero, modality_strip]) table.select(_on_select, [table], [detail]) if __name__ == "__main__": demo.queue().launch( ssr_mode=False, allowed_paths=[str(ASSETS_DIR)], )