Spaces:

implicit-personalization
/

persona-ui

Running

Jac-Zac commited on 2 days ago

Commit

ae347c6

1 Parent(s): b279884

add session-scoped NDIF execution and improve cold-load UX

- bind remote NDIF backends to per-session API keys instead of
process-global state
- route chat, compare-chat, probe tracing, contrast scoring, and
extraction through explicit remote backends
- reuse upstream extraction via backend_factory with persona-vectors
- add Hugging Face cold-load notices for datasets and vector stores
- improve NDIF model discovery resilience and refresh sidebar key
handling
- update docs and env guidance for per-session NDIF keys and current
dependency setup
- Improved general things in the ui and added dataset loading
information
- Added ndif information when performing genereation

Files changed (19) hide show

.env.example +3 -3
README.md +11 -9
app.py +5 -7
pyproject.toml +2 -1
tabs/chat.py +2 -1
tabs/chat_shared.py +2 -0
tabs/compare_chat.py +5 -2
tabs/extract.py +14 -2
tabs/probe_ui.py +2 -1
tests/test_analysis_sources.py +57 -0
tests/test_datasets.py +17 -4
tests/test_runtime_session_ndif.py +75 -0
utils/analysis_sources.py +44 -13
utils/chat.py +7 -32
utils/contrast.py +12 -3
utils/datasets.py +3 -1
utils/probe_trace.py +8 -1
utils/runtime.py +53 -5
uv.lock +4 -4

.env.example CHANGED Viewed

@@ -1,8 +1,8 @@
 # Copy this file to .env and fill in the values.
-# NDIF API key for remote nnsight execution
-# Required only when REMOTE=True in notebook.py
-# Get yours at https://login.ndif.us
 NDIF_API_KEY=your-ndif-api-key-here
 # HuggingFace model cache directory

 # Copy this file to .env and fill in the values.
+# Optional app-level NDIF API key for remote nnsight execution.
+# If omitted, users can enter their own per-session key in the sidebar.
+# Get one at https://login.ndif.us
 NDIF_API_KEY=your-ndif-api-key-here
 # HuggingFace model cache directory

README.md CHANGED Viewed

@@ -63,11 +63,9 @@ cp .env.example .env
 ## Local Development
-This checkout is configured to use the sibling `../persona-vectors` package as
-an editable dependency. For deployment, switch `persona-vectors` back to the
-published package or another installable source.
-`persona-data` can also be checked out next to this repo for local package work.
 Example:
@@ -97,13 +95,15 @@ This app can be deployed to Hugging Face Spaces using Docker.
 ### Prerequisites
-No secrets needed! The dependencies are published on PyPI.
 ### Build Locally (Optional)
 ```bash
 docker build -t persona-ui .
-# Specify your local .env to have things working as expectd
 docker run --env-file .env --rm -p 8501:8501 persona-ui
 ```
@@ -112,7 +112,7 @@ docker run --env-file .env --rm -p 8501:8501 persona-ui
 Copy `.env.example` to `.env` and fill in:
 ```bash
-NDIF_API_KEY=...       # Required for remote (NDIF) model execution
 HF_HOME=...            # Optional: HuggingFace cache directory
 ARTIFACTS_DIR=...      # Optional: where persona vectors are read from (default: ./artifacts)
 PERSONA_VECTORS_HUB_REPO=...  # Optional: default Analysis/Probing Hub dataset repo
@@ -122,7 +122,9 @@ PERSONA_UI_FIGURE_STATE_ENTRIES=2     # Optional: recent rendered Analysis figur
 PERSONA_UI_PREPARED_STATE_ENTRIES=4   # Optional: recent projection-ready markers kept in-session
 ```
-The app picks up this file automatically via `load_dotenv()` on startup.
 ## Persona Vectors

 ## Local Development
+The checked-in dependency config uses published packages. For local package
+work, uncomment the `tool.uv.sources` block in `pyproject.toml` and keep sibling
+checkouts next to this repo.
 Example:
 ### Prerequisites
+Dependencies are published on PyPI, so deployment does not require sibling
+checkouts. Remote NDIF execution still needs an API key, either configured as an
+environment variable or entered by each user in the sidebar.
 ### Build Locally (Optional)
 ```bash
 docker build -t persona-ui .
+# Pass your local .env if you want the container to use the same configuration
 docker run --env-file .env --rm -p 8501:8501 persona-ui
 ```
 Copy `.env.example` to `.env` and fill in:
 ```bash
+NDIF_API_KEY=...       # Optional shared NDIF key; users can also enter one per session
 HF_HOME=...            # Optional: HuggingFace cache directory
 ARTIFACTS_DIR=...      # Optional: where persona vectors are read from (default: ./artifacts)
 PERSONA_VECTORS_HUB_REPO=...  # Optional: default Analysis/Probing Hub dataset repo
 PERSONA_UI_PREPARED_STATE_ENTRIES=4   # Optional: recent projection-ready markers kept in-session
 ```
+The app picks up this file automatically via `load_dotenv()` on startup. If
+`NDIF_API_KEY` is unset, Chat and Extract users are prompted for a per-session
+key when they need remote execution.
 ## Persona Vectors

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ from dotenv import load_dotenv
 from utils.analysis_sources import DEFAULT_COMPARE_MODEL, DEFAULT_HUB_REPO, SOURCE_HUB
 from utils.helpers import DATASET_SOURCES, session_key, widget_key
 from utils.preload import preload_once
-from utils.runtime import list_remote_models
 from utils.theme import active_base, install_catppuccin_theme
 load_dotenv()
@@ -181,10 +181,10 @@ def _remote_model_input(remote_models: list[str]) -> str:
 def _ndif_api_key_input() -> None:
-    """Prompt for an NDIF API key when none is configured via the environment."""
-    import nnsight
-    if os.environ.get("NDIF_API_KEY") or nnsight.CONFIG.API.APIKEY:
         return
     api_key = st.text_input(
@@ -193,9 +193,7 @@ def _ndif_api_key_input() -> None:
         key=_SIDEBAR_NDIF_API_KEY,
         help=f"Required for remote (NDIF) execution. Register at {NDIF_REGISTRATION_URL}",
     )
-    if api_key:
-        nnsight.CONFIG.API.APIKEY = api_key
-    else:
         st.caption(f"No NDIF API key found. [Get one]({NDIF_REGISTRATION_URL}).")

 from utils.analysis_sources import DEFAULT_COMPARE_MODEL, DEFAULT_HUB_REPO, SOURCE_HUB
 from utils.helpers import DATASET_SOURCES, session_key, widget_key
 from utils.preload import preload_once
+from utils.runtime import configured_ndif_api_key, list_remote_models
 from utils.theme import active_base, install_catppuccin_theme
 load_dotenv()
 def _ndif_api_key_input() -> None:
+    """Prompt for a per-session NDIF API key."""
+    if configured_ndif_api_key():
+        st.caption("Using NDIF API key from environment.")
         return
     api_key = st.text_input(
         key=_SIDEBAR_NDIF_API_KEY,
         help=f"Required for remote (NDIF) execution. Register at {NDIF_REGISTRATION_URL}",
     )
+    if not api_key:
         st.caption(f"No NDIF API key found. [Get one]({NDIF_REGISTRATION_URL}).")

pyproject.toml CHANGED Viewed

@@ -5,7 +5,7 @@ description = "Streamlit UI for persona-vectors"
 readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
-    "persona-vectors>=0.8.3",
     "datasets>=4.8.5",
     "huggingface-hub>=1.14.0",
     "streamlit>=1.44.0",
@@ -22,6 +22,7 @@ dev = [
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 # Local development:
 # [tool.uv.sources]

 readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
+    "persona-vectors>=0.8.4",
     "datasets>=4.8.5",
     "huggingface-hub>=1.14.0",
     "streamlit>=1.44.0",
 [tool.pytest.ini_options]
 testpaths = ["tests"]
+pythonpath = ["."]
 # Local development:
 # [tool.uv.sources]

tabs/chat.py CHANGED Viewed

@@ -28,7 +28,7 @@ from tabs.chat_ui import (
 from utils.chat import build_chat_messages, resolve_system_prompt
 from utils.chat_export import save_chat_export
 from utils.helpers import format_ndif_status, session_key, widget_key
-from utils.runtime import cached_model
 if TYPE_CHECKING:
     from persona_data.synth_persona import PersonaData
@@ -129,6 +129,7 @@ def _handle_single_chat_generation(
             generation=generation,
             on_status=_show_ndif_status if remote else None,
             on_error=_show_error,
         )
         if error is not None:
             status_box.empty()

 from utils.chat import build_chat_messages, resolve_system_prompt
 from utils.chat_export import save_chat_export
 from utils.helpers import format_ndif_status, session_key, widget_key
+from utils.runtime import cached_model, session_ndif_api_key
 if TYPE_CHECKING:
     from persona_data.synth_persona import PersonaData
             generation=generation,
             on_status=_show_ndif_status if remote else None,
             on_error=_show_error,
+            ndif_api_key=session_ndif_api_key(),
         )
         if error is not None:
             status_box.empty()

tabs/chat_shared.py CHANGED Viewed

@@ -109,6 +109,7 @@ def generate_chat_reply_result(
     generation: GenerationConfig,
     on_status: Callable[[str, str, str], None] | None = None,
     on_error: Callable[[Exception], None] | None = None,
 ) -> tuple[ChatReply | None, Exception | None]:
     try:
         return (
@@ -117,6 +118,7 @@ def generate_chat_reply_result(
                 messages=messages,
                 remote=remote,
                 on_status=on_status,
                 **generation.to_generate_kwargs(),
             ),
             None,

     generation: GenerationConfig,
     on_status: Callable[[str, str, str], None] | None = None,
     on_error: Callable[[Exception], None] | None = None,
+    ndif_api_key: str | None = None,
 ) -> tuple[ChatReply | None, Exception | None]:
     try:
         return (
                 messages=messages,
                 remote=remote,
                 on_status=on_status,
+                ndif_api_key=ndif_api_key,
                 **generation.to_generate_kwargs(),
             ),
             None,

tabs/compare_chat.py CHANGED Viewed

@@ -15,7 +15,7 @@ from utils.chat import ChatReply, build_chat_messages, resolve_system_prompt
 from utils.chat_export import save_chat_export
 from utils.contrast import compute_contrast, compute_contrast_pair
 from utils.helpers import format_ndif_status, persona_label, session_key, widget_key
-from utils.runtime import cached_model
 from .chat_ui import (
     GenerationConfig,
@@ -173,6 +173,7 @@ def _generate_panels(
                 remote=remote,
                 generation=generation,
                 on_status=_show_ndif_status if remote else None,
             )
             results.append(reply if error is None else error)
     status_box.empty()
@@ -254,6 +255,7 @@ def _recompute_pending_contrast(
                     label_a=label_a,
                     label_b=label_b,
                     remote=remote,
                 )
                 if contrast is not None:
                     msg["_contrast"] = contrast
@@ -295,7 +297,7 @@ def _render_compare_footer(
     footer = st.container()
     with footer:
-        exp_col, rst_col, _spacer = st.columns([1, 1.25, 17.5], gap="xsmall")
         with exp_col:
             if st.button(
                 "",
@@ -379,6 +381,7 @@ def _compute_new_reply_contrast(
                 label_a=persona_label(left.persona),
                 label_b=persona_label(right.persona),
                 remote=remote,
             )
             if left_contrast is not None:
                 left.state["messages"][-1]["_contrast"] = left_contrast

 from utils.chat_export import save_chat_export
 from utils.contrast import compute_contrast, compute_contrast_pair
 from utils.helpers import format_ndif_status, persona_label, session_key, widget_key
+from utils.runtime import cached_model, session_ndif_api_key
 from .chat_ui import (
     GenerationConfig,
                 remote=remote,
                 generation=generation,
                 on_status=_show_ndif_status if remote else None,
+                ndif_api_key=session_ndif_api_key(),
             )
             results.append(reply if error is None else error)
     status_box.empty()
                     label_a=label_a,
                     label_b=label_b,
                     remote=remote,
+                    ndif_api_key=session_ndif_api_key(),
                 )
                 if contrast is not None:
                     msg["_contrast"] = contrast
     footer = st.container()
     with footer:
+        exp_col, rst_col, _spacer = st.columns([1, 1.25, 20], gap="xsmall")
         with exp_col:
             if st.button(
                 "",
                 label_a=persona_label(left.persona),
                 label_b=persona_label(right.persona),
                 remote=remote,
+                ndif_api_key=session_ndif_api_key(),
             )
             if left_contrast is not None:
                 left.state["messages"][-1]["_contrast"] = left_contrast

tabs/extract.py CHANGED Viewed

@@ -26,7 +26,7 @@ from utils.helpers import (
     session_key,
     widget_key,
 )
-from utils.runtime import cached_model
 from utils.theme import active_base
 _LAST_VARIANTS_KEY = "extract:last_variants"
@@ -366,16 +366,28 @@ def _run_extraction_plan(
                 step / total_steps if total_steps else 1.0,
                 text=f"{_row_label(persona, variant)} ({step + 1}/{total_steps})",
             )
             results.extend(
                 run_extraction(
                     model=model,
                     model_name=model_name,
-                    qa_pairs=qa_pairs[: settings.max_questions],
                     variants=(variant,),
                     persona=persona,
                     mask_strategy=settings.mask_strategy,
                     remote=remote,
                     on_status=_on_ndif_status if remote else None,
                 )
             )

     session_key,
     widget_key,
 )
+from utils.runtime import cached_model, remote_backend, session_ndif_api_key
 from utils.theme import active_base
 _LAST_VARIANTS_KEY = "extract:last_variants"
                 step / total_steps if total_steps else 1.0,
                 text=f"{_row_label(persona, variant)} ({step + 1}/{total_steps})",
             )
+            selected_qa = qa_pairs[: settings.max_questions]
             results.extend(
                 run_extraction(
                     model=model,
                     model_name=model_name,
+                    qa_pairs=selected_qa,
                     variants=(variant,),
                     persona=persona,
                     mask_strategy=settings.mask_strategy,
                     remote=remote,
                     on_status=_on_ndif_status if remote else None,
+                    backend_factory=(
+                        (
+                            lambda: remote_backend(
+                                model,
+                                session_ndif_api_key(),
+                                on_status=_on_ndif_status,
+                            )
+                        )
+                        if remote
+                        else None
+                    ),
                 )
             )

tabs/probe_ui.py CHANGED Viewed

@@ -28,7 +28,7 @@ from utils.probes import (
     load_probe,
     load_probe_from_bytes,
 )
-from utils.runtime import cached_model
 from utils.selection_controls import remembered_segmented_control
 _LAST_SOURCE_KEY = session_key("probe", "last_source")
@@ -428,6 +428,7 @@ def render_probe_inspector(
                     layer=layer,
                     location=location,
                     remote=remote,
                 )
             except Exception as exc:
                 _reset()

     load_probe,
     load_probe_from_bytes,
 )
+from utils.runtime import cached_model, session_ndif_api_key
 from utils.selection_controls import remembered_segmented_control
 _LAST_SOURCE_KEY = session_key("probe", "last_source")
                     layer=layer,
                     location=location,
                     remote=remote,
+                    ndif_api_key=session_ndif_api_key(),
                 )
             except Exception as exc:
                 _reset()

tests/test_analysis_sources.py ADDED Viewed

	@@ -0,0 +1,57 @@

+from __future__ import annotations
+from utils import analysis_sources
+class _Notice:
+    def __init__(self) -> None:
+        self.messages: list[str] = []
+        self.empty_calls = 0
+    def warning(self, message: str) -> None:
+        self.messages.append(message)
+    def empty(self) -> None:
+        self.empty_calls += 1
+def test_hub_vector_notice_is_transient_for_unopened_variants(monkeypatch):
+    notice = _Notice()
+    class DummyHubStore:
+        _datasets = {"templated": object()}
+    monkeypatch.setattr(
+        analysis_sources,
+        "HFPersonaVectorStore",
+        DummyHubStore,
+    )
+    monkeypatch.setattr(analysis_sources.st, "empty", lambda: notice)
+    with analysis_sources._hub_vector_notice(
+        DummyHubStore(), ("templated", "biography")
+    ):
+        pass
+    assert notice.messages
+    assert "persona vectors from Hugging Face" in notice.messages[0]
+    assert notice.empty_calls == 1
+def test_hub_vector_notice_stays_quiet_when_variants_are_open(monkeypatch):
+    class DummyHubStore:
+        _datasets = {"templated": object()}
+    monkeypatch.setattr(
+        analysis_sources,
+        "HFPersonaVectorStore",
+        DummyHubStore,
+    )
+    called = []
+    monkeypatch.setattr(analysis_sources.st, "empty", lambda: called.append(True))
+    with analysis_sources._hub_vector_notice(DummyHubStore(), ("templated",)):
+        pass
+    assert called == []

tests/test_datasets.py CHANGED Viewed

@@ -11,8 +11,20 @@ class _Progress:
         self.updates.append((value, text))
 def test_download_missing_startup_files_only_fetches_uncached_files(monkeypatch):
-    warnings: list[str] = []
     progress = _Progress()
     downloads: list[tuple[str, str, str]] = []
@@ -21,7 +33,7 @@ def test_download_missing_startup_files_only_fetches_uncached_files(monkeypatch)
         "_is_cached",
         lambda _repo, filename: filename == "already.jsonl",
     )
-    monkeypatch.setattr(datasets.st, "warning", warnings.append)
     monkeypatch.setattr(
         datasets.st,
         "progress",
@@ -41,7 +53,8 @@ def test_download_missing_startup_files_only_fetches_uncached_files(monkeypatch)
         "Example",
     )
-    assert warnings and "First-time setup for Example" in warnings[0]
     assert downloads == [("org/repo", "missing.jsonl", "dataset")]
     assert progress.updates[-1] == (1.0, "Downloaded missing.jsonl (1/1)")
@@ -52,7 +65,7 @@ def test_download_missing_startup_files_stays_quiet_when_cached(monkeypatch):
     def unexpected(*_args, **_kwargs):
         raise AssertionError("cold-download UI should not render for warm cache")
-    monkeypatch.setattr(datasets.st, "warning", unexpected)
     monkeypatch.setattr(datasets.st, "progress", unexpected)
     monkeypatch.setattr(datasets, "hf_hub_download", unexpected)

         self.updates.append((value, text))
+class _Notice:
+    def __init__(self) -> None:
+        self.messages: list[str] = []
+        self.empty_calls = 0
+    def warning(self, message: str) -> None:
+        self.messages.append(message)
+    def empty(self) -> None:
+        self.empty_calls += 1
 def test_download_missing_startup_files_only_fetches_uncached_files(monkeypatch):
+    notice = _Notice()
     progress = _Progress()
     downloads: list[tuple[str, str, str]] = []
         "_is_cached",
         lambda _repo, filename: filename == "already.jsonl",
     )
+    monkeypatch.setattr(datasets.st, "empty", lambda: notice)
     monkeypatch.setattr(
         datasets.st,
         "progress",
         "Example",
     )
+    assert notice.messages and "First-time setup for Example" in notice.messages[0]
+    assert notice.empty_calls == 1
     assert downloads == [("org/repo", "missing.jsonl", "dataset")]
     assert progress.updates[-1] == (1.0, "Downloaded missing.jsonl (1/1)")
     def unexpected(*_args, **_kwargs):
         raise AssertionError("cold-download UI should not render for warm cache")
+    monkeypatch.setattr(datasets.st, "empty", unexpected)
     monkeypatch.setattr(datasets.st, "progress", unexpected)
     monkeypatch.setattr(datasets, "hf_hub_download", unexpected)

tests/test_runtime_session_ndif.py ADDED Viewed

	@@ -0,0 +1,75 @@

+from __future__ import annotations
+from utils import runtime
+def test_session_ndif_api_key_is_read_from_current_session(monkeypatch) -> None:
+    monkeypatch.setattr(
+        runtime.st,
+        "session_state",
+        {"sidebar:ndif_api_key": "user-a-key"},
+    )
+    assert runtime.session_ndif_api_key() == "user-a-key"
+    monkeypatch.setattr(
+        runtime.st,
+        "session_state",
+        {"sidebar:ndif_api_key": "user-b-key"},
+    )
+    assert runtime.session_ndif_api_key() == "user-b-key"
+def test_configured_ndif_api_key_reads_environment(monkeypatch) -> None:
+    monkeypatch.setenv("NDIF_API_KEY", "env-key")
+    assert runtime.configured_ndif_api_key() == "env-key"
+def test_remote_backend_binds_explicit_session_key(monkeypatch) -> None:
+    from nnsight.intervention.backends import remote
+    seen: list[str | None] = []
+    class FakeBackend:
+        def __init__(self, model_key: str, api_key: str | None = None) -> None:
+            self.model_key = model_key
+            self.api_key = api_key
+            self.verbose = False
+            seen.append(api_key)
+    class FakeModel:
+        def to_model_key(self) -> str:
+            return "model-key"
+    monkeypatch.setattr(remote, "RemoteBackend", FakeBackend)
+    monkeypatch.setattr(
+        runtime.st,
+        "session_state",
+        {"sidebar:ndif_api_key": "ambient-session-key"},
+    )
+    backend = runtime.remote_backend(FakeModel(), "explicit-user-key")
+    assert backend.api_key == "explicit-user-key"
+    assert seen == ["explicit-user-key"]
+def test_remote_backend_falls_back_to_environment_key(monkeypatch) -> None:
+    from nnsight.intervention.backends import remote
+    class FakeBackend:
+        def __init__(self, model_key: str, api_key: str | None = None) -> None:
+            self.model_key = model_key
+            self.api_key = api_key
+            self.verbose = False
+    class FakeModel:
+        def to_model_key(self) -> str:
+            return "model-key"
+    monkeypatch.setattr(remote, "RemoteBackend", FakeBackend)
+    monkeypatch.setattr(runtime.st, "session_state", {})
+    monkeypatch.setenv("NDIF_API_KEY", "env-key")
+    backend = runtime.remote_backend(FakeModel())
+    assert backend.api_key == "env-key"

utils/analysis_sources.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 import streamlit as st
 from persona_vectors.analysis import (
@@ -39,6 +40,34 @@ _VECTOR_CACHE_ENTRIES = env_int("PERSONA_UI_VECTOR_CACHE_ENTRIES", 4)
 _PREPARED_CACHE_ENTRIES = env_int("PERSONA_UI_PREPARED_CACHE_ENTRIES", 8)
 @st.cache_resource(show_spinner=False, max_entries=_STORE_CACHE_ENTRIES)
 def activation_store_cached(
     source: str,
@@ -74,9 +103,9 @@ def personas_cached(
     *,
     include_baseline: bool = False,
 ) -> list[str]:
-    return activation_store_cached(
-        source, location, model_name, mask_strategy_value
-    ).list_personas(list(variants), include_baseline=include_baseline)
 @st.cache_data(show_spinner=False)
@@ -89,7 +118,8 @@ def persona_names_cached(
     persona_ids: tuple[str, ...],
 ) -> dict[str, str]:
     store = activation_store_cached(source, location, model_name, mask_strategy_value)
-    names = store.persona_names(list(persona_ids), variants=list(variants))
     # Preserve input order, fall back to the id when the row has no display name.
     return {pid: names.get(pid, pid) for pid in persona_ids}
@@ -103,9 +133,9 @@ def store_layers_cached(
     variants: tuple[str, ...],
     persona_ids: tuple[str, ...],
 ) -> list[int]:
-    return activation_store_cached(
-        source, location, model_name, mask_strategy_value
-    ).list_layers(list(variants), list(persona_ids))
 @st.cache_data(show_spinner=False)
@@ -156,12 +186,13 @@ def load_analysis_dataset_cached(
     persona_ids: tuple[str, ...],
 ) -> AnalysisDataset:
     store = activation_store_cached(source, location, model_name, mask_strategy_value)
-    return load_analysis_dataset(
-        store,
-        variants,
-        mask_strategy=MaskStrategy(mask_strategy_value),
-        persona_ids=persona_ids,
-    )
 def load_persona_vectors_cached(

 import os
+from contextlib import contextmanager
 import streamlit as st
 from persona_vectors.analysis import (
 _PREPARED_CACHE_ENTRIES = env_int("PERSONA_UI_PREPARED_CACHE_ENTRIES", 8)
+def _hub_variants_pending(store: Store, variants: tuple[str, ...]) -> tuple[str, ...]:
+    """Return Hub variants that have not yet been opened by this store instance."""
+    if not isinstance(store, HFPersonaVectorStore):
+        return ()
+    return tuple(variant for variant in variants if variant not in store._datasets)
+@contextmanager
+def _hub_vector_notice(store: Store, variants: tuple[str, ...]):
+    """Show a transient, honest cold-load note for Hub-backed vector data."""
+    pending = _hub_variants_pending(store, variants)
+    if not pending:
+        yield
+        return
+    notice = st.empty()
+    notice.warning(
+        "Loading persona vectors from Hugging Face. "
+        "On a cold cache, this may download Hub dataset files."
+    )
+    try:
+        yield
+    finally:
+        notice.empty()
 @st.cache_resource(show_spinner=False, max_entries=_STORE_CACHE_ENTRIES)
 def activation_store_cached(
     source: str,
     *,
     include_baseline: bool = False,
 ) -> list[str]:
+    store = activation_store_cached(source, location, model_name, mask_strategy_value)
+    with _hub_vector_notice(store, variants):
+        return store.list_personas(list(variants), include_baseline=include_baseline)
 @st.cache_data(show_spinner=False)
     persona_ids: tuple[str, ...],
 ) -> dict[str, str]:
     store = activation_store_cached(source, location, model_name, mask_strategy_value)
+    with _hub_vector_notice(store, variants):
+        names = store.persona_names(list(persona_ids), variants=list(variants))
     # Preserve input order, fall back to the id when the row has no display name.
     return {pid: names.get(pid, pid) for pid in persona_ids}
     variants: tuple[str, ...],
     persona_ids: tuple[str, ...],
 ) -> list[int]:
+    store = activation_store_cached(source, location, model_name, mask_strategy_value)
+    with _hub_vector_notice(store, variants):
+        return store.list_layers(list(variants), list(persona_ids))
 @st.cache_data(show_spinner=False)
     persona_ids: tuple[str, ...],
 ) -> AnalysisDataset:
     store = activation_store_cached(source, location, model_name, mask_strategy_value)
+    with _hub_vector_notice(store, variants):
+        return load_analysis_dataset(
+            store,
+            variants,
+            mask_strategy=MaskStrategy(mask_strategy_value),
+            persona_ids=persona_ids,
+        )
 def load_persona_vectors_cached(

utils/chat.py CHANGED Viewed

@@ -187,6 +187,7 @@ def generate_chat_reply(
     repetition_penalty: float = 1.0,
     seed: int | None = None,
     on_status: Callable[[str, str, str], None] | None = None,
 ) -> ChatReply:
     """Generate one assistant reply from a full chat history.
@@ -230,7 +231,12 @@ def generate_chat_reply(
         generation_kwargs["repetition_penalty"] = repetition_penalty
     # `remote` is captured by nnsight's RemoteableMixin.trace() and is NOT
     # forwarded to the underlying model's generate
-    backend = _build_remote_backend(model, on_status) if remote else None
     with (
         _seeded_rng(seed if do_sample and not remote else None),
@@ -256,34 +262,3 @@ def generate_chat_reply(
         text=text,
         generated_ids=generated_ids.detach().cpu(),
     )
-def _build_remote_backend(
-    model: StandardizedTransformer,
-    on_status: Callable[[str, str, str], None] | None,
-):
-    """Build an NDIF backend that can surface lifecycle updates to callers."""
-    if on_status is None:
-        return None
-    from nnsight.intervention.backends.remote import JobStatusDisplay, RemoteBackend
-    class _CallbackJobStatusDisplay(JobStatusDisplay):
-        def update(
-            self,
-            job_id: str = "",
-            status_name: str = "",
-            description: str = "",
-        ):
-            super().update(job_id, status_name, description)
-            if status_name:
-                on_status(job_id, status_name, description)
-    backend = RemoteBackend(model.to_model_key())
-    backend.CONNECT_TIMEOUT = 300.0
-    backend.status_display = _CallbackJobStatusDisplay(
-        enabled=True,
-        verbose=backend.verbose,
-    )
-    return backend

     repetition_penalty: float = 1.0,
     seed: int | None = None,
     on_status: Callable[[str, str, str], None] | None = None,
+    ndif_api_key: str | None = None,
 ) -> ChatReply:
     """Generate one assistant reply from a full chat history.
         generation_kwargs["repetition_penalty"] = repetition_penalty
     # `remote` is captured by nnsight's RemoteableMixin.trace() and is NOT
     # forwarded to the underlying model's generate
+    if remote:
+        from utils.runtime import remote_backend
+        backend = remote_backend(model, ndif_api_key, on_status=on_status)
+    else:
+        backend = None
     with (
         _seeded_rng(seed if do_sample and not remote else None),
         text=text,
         generated_ids=generated_ids.detach().cpu(),
     )

utils/contrast.py CHANGED Viewed

@@ -100,6 +100,7 @@ def _score_passes(
     model: StandardizedTransformer,
     specs: list[PassSpec],
     remote: bool,
 ) -> dict[str, torch.Tensor]:
     """
     Run one forward pass per spec and return reduced per-token logprobs.
@@ -115,7 +116,13 @@ def _score_passes(
         n_resp: int,
         target_ids: torch.Tensor,
     ) -> torch.Tensor:
-        with torch.no_grad(), model.trace(input_ids, remote=remote):
             # logit at position i predicts token i+1, so response token j
             # (at full-text position n_ctx+j) uses logit at n_ctx+j-1.
             resp_logits = model.logits[0, n_ctx - 1 : n_ctx - 1 + n_resp].float()
@@ -157,6 +164,7 @@ def compute_contrast(
     label_a: str,
     label_b: str,
     remote: bool = False,
 ) -> "TokenContrast | None":
     """Compute per-token contrast weights for a single response (2 forward passes)."""
     tokenizer = model.tokenizer
@@ -164,7 +172,7 @@ def compute_contrast(
         return None
     specs = _specs_for_response(tokenizer, response_ids, context_a, context_b, "r")
-    out = _score_passes(model, specs, remote)
     return _build_contrast(
         tokenizer, response_ids, out["r_under_a"], out["r_under_b"], label_a, label_b
     )
@@ -179,6 +187,7 @@ def compute_contrast_pair(
     label_a: str,
     label_b: str,
     remote: bool = False,
 ) -> tuple["TokenContrast | None", "TokenContrast | None"]:
     """
     Compute contrast weights for both panel responses (up to 4 remote passes).
@@ -197,7 +206,7 @@ def compute_contrast_pair(
             tokenizer, response_ids_b, context_a, context_b, "b"
         )
-    out = _score_passes(model, specs, remote)
     def _build(resp_ids: torch.Tensor, prefix: str) -> "TokenContrast | None":
         k_a, k_b = f"{prefix}_under_a", f"{prefix}_under_b"

     model: StandardizedTransformer,
     specs: list[PassSpec],
     remote: bool,
+    ndif_api_key: str | None = None,
 ) -> dict[str, torch.Tensor]:
     """
     Run one forward pass per spec and return reduced per-token logprobs.
         n_resp: int,
         target_ids: torch.Tensor,
     ) -> torch.Tensor:
+        if remote:
+            from utils.runtime import remote_backend
+            backend = remote_backend(model, ndif_api_key)
+        else:
+            backend = None
+        with torch.no_grad(), model.trace(input_ids, remote=remote, backend=backend):
             # logit at position i predicts token i+1, so response token j
             # (at full-text position n_ctx+j) uses logit at n_ctx+j-1.
             resp_logits = model.logits[0, n_ctx - 1 : n_ctx - 1 + n_resp].float()
     label_a: str,
     label_b: str,
     remote: bool = False,
+    ndif_api_key: str | None = None,
 ) -> "TokenContrast | None":
     """Compute per-token contrast weights for a single response (2 forward passes)."""
     tokenizer = model.tokenizer
         return None
     specs = _specs_for_response(tokenizer, response_ids, context_a, context_b, "r")
+    out = _score_passes(model, specs, remote, ndif_api_key)
     return _build_contrast(
         tokenizer, response_ids, out["r_under_a"], out["r_under_b"], label_a, label_b
     )
     label_a: str,
     label_b: str,
     remote: bool = False,
+    ndif_api_key: str | None = None,
 ) -> tuple["TokenContrast | None", "TokenContrast | None"]:
     """
     Compute contrast weights for both panel responses (up to 4 remote passes).
             tokenizer, response_ids_b, context_a, context_b, "b"
         )
+    out = _score_passes(model, specs, remote, ndif_api_key)
     def _build(resp_ids: torch.Tensor, prefix: str) -> "TokenContrast | None":
         k_a, k_b = f"{prefix}_under_a", f"{prefix}_under_b"

utils/datasets.py CHANGED Viewed

@@ -183,7 +183,8 @@ def _download_missing_startup_files_if_needed(
     if not missing:
         return
-    st.warning(
         f"First-time setup for {label}: downloading dataset files from Hugging Face. "
         "Later loads should use the local cache."
     )
@@ -199,6 +200,7 @@ def _download_missing_startup_files_if_needed(
             index / total,
             text=f"Downloaded {filename} ({index}/{total})",
         )
 def _prepare_nemotron_startup_download(dataset_source: str, label: str) -> None:

     if not missing:
         return
+    notice = st.empty()
+    notice.warning(
         f"First-time setup for {label}: downloading dataset files from Hugging Face. "
         "Later loads should use the local cache."
     )
             index / total,
             text=f"Downloaded {filename} ({index}/{total})",
         )
+    notice.empty()
 def _prepare_nemotron_startup_download(dataset_source: str, label: str) -> None:

utils/probe_trace.py CHANGED Viewed

@@ -51,6 +51,7 @@ def trace_conversation(
     layer: int,
     location: str,
     remote: bool,
 ) -> ConversationTrace:
     prompt_text, _ = format_generation_prompt(
         messages,
@@ -71,7 +72,13 @@ def trace_conversation(
         return cached
     accessor = _select_accessor(model, location)
-    with torch.no_grad(), model.trace(prompt_text, remote=remote):
         saved_ids = model.input_ids[0].detach().cpu().save()
         saved_acts = accessor[layer][0].detach().float().cpu().save()

     layer: int,
     location: str,
     remote: bool,
+    ndif_api_key: str | None = None,
 ) -> ConversationTrace:
     prompt_text, _ = format_generation_prompt(
         messages,
         return cached
     accessor = _select_accessor(model, location)
+    if remote:
+        from utils.runtime import remote_backend
+        backend = remote_backend(model, ndif_api_key)
+    else:
+        backend = None
+    with torch.no_grad(), model.trace(prompt_text, remote=remote, backend=backend):
         saved_ids = model.input_ids[0].detach().cpu().save()
         saved_acts = accessor[layer][0].detach().float().cpu().save()

utils/runtime.py CHANGED Viewed

@@ -1,15 +1,17 @@
 import json
 import logging
 from collections.abc import Iterable
 import streamlit as st
-from utils.helpers import env_int
 logger = logging.getLogger(__name__)
 _LANGUAGE_MODEL_CLASSES = {"LanguageModel", "StandardizedTransformer"}
 _EXPECTED_NDIF_STATES = {"RUNNING", "NOT DEPLOYED", "DEPLOYING", "DELETING"}
 _MODEL_CACHE_ENTRIES = env_int("PERSONA_UI_MODEL_CACHE_ENTRIES", 1)
 def _iter_deployments(raw: object) -> Iterable[dict]:
@@ -60,17 +62,17 @@ def _unexpected_state(deployment: dict) -> tuple[str, str] | None:
 def list_remote_models() -> list[str]:
     """Return the NDIF language models that are currently running.
-    Parses the raw NDIF response directly instead of going through
-    ``nnsight.ndif_status()`` because that call crashes whenever NDIF reports
     any deployment with an ``application_state`` that isn't in nnsight's
     ``ModelStatus`` enum (e.g. ``UNHEALTHY``) — one bad deployment poisons
     the whole response. See nnsight 0.6.3 ``ndif.py::status``.
     """
-    import nnsight
     try:
-        raw = nnsight.ndif_status(raw=True)
     except Exception:
         logger.warning("Failed to fetch NDIF status", exc_info=True)
         return []
@@ -94,6 +96,52 @@ def list_remote_models() -> list[str]:
     return sorted(set(model_names))
 @st.cache_resource(show_spinner=False, max_entries=_MODEL_CACHE_ENTRIES)
 def cached_model(model_name: str):
     """Load and cache a standardized nnterp model.

 import json
 import logging
+import os
 from collections.abc import Iterable
 import streamlit as st
+from utils.helpers import env_int, session_key
 logger = logging.getLogger(__name__)
 _LANGUAGE_MODEL_CLASSES = {"LanguageModel", "StandardizedTransformer"}
 _EXPECTED_NDIF_STATES = {"RUNNING", "NOT DEPLOYED", "DEPLOYING", "DELETING"}
 _MODEL_CACHE_ENTRIES = env_int("PERSONA_UI_MODEL_CACHE_ENTRIES", 1)
+_SESSION_NDIF_API_KEY = session_key("sidebar", "ndif_api_key")
 def _iter_deployments(raw: object) -> Iterable[dict]:
 def list_remote_models() -> list[str]:
     """Return the NDIF language models that are currently running.
+    Parses the raw NDIF response directly instead of going through the formatted
+    ``nnsight.ndif.status()`` response because formatting crashes whenever NDIF reports
     any deployment with an ``application_state`` that isn't in nnsight's
     ``ModelStatus`` enum (e.g. ``UNHEALTHY``) — one bad deployment poisons
     the whole response. See nnsight 0.6.3 ``ndif.py::status``.
     """
+    from nnsight.ndif import status
     try:
+        raw = status(raw=True)
     except Exception:
         logger.warning("Failed to fetch NDIF status", exc_info=True)
         return []
     return sorted(set(model_names))
+def session_ndif_api_key() -> str | None:
+    """Return this visitor's NDIF key without touching process globals."""
+    value = st.session_state.get(_SESSION_NDIF_API_KEY)
+    return value if isinstance(value, str) and value else None
+def configured_ndif_api_key() -> str | None:
+    """Return an app-level NDIF key configured through the environment, if any."""
+    value = os.environ.get("NDIF_API_KEY")
+    return value if value else None
+def remote_backend(model: object, api_key: str | None = None, *, on_status=None):
+    """Build an NDIF backend with credentials bound to one browser session."""
+    from nnsight.intervention.backends.remote import JobStatusDisplay, RemoteBackend
+    active_key = api_key or session_ndif_api_key() or configured_ndif_api_key()
+    if not active_key:
+        raise RuntimeError("Enter your NDIF API key before using remote execution.")
+    backend = RemoteBackend(model.to_model_key(), api_key=active_key)
+    backend.CONNECT_TIMEOUT = 300.0
+    if on_status is None:
+        return backend
+    class _CallbackJobStatusDisplay(JobStatusDisplay):
+        def update(
+            self,
+            job_id: str = "",
+            status_name: str = "",
+            description: str = "",
+        ):
+            super().update(job_id, status_name, description)
+            if status_name:
+                on_status(job_id, status_name, description)
+    backend.status_display = _CallbackJobStatusDisplay(
+        enabled=True,
+        verbose=backend.verbose,
+    )
+    return backend
 @st.cache_resource(show_spinner=False, max_entries=_MODEL_CACHE_ENTRIES)
 def cached_model(model_name: str):
     """Load and cache a standardized nnterp model.

uv.lock CHANGED Viewed

@@ -1608,7 +1608,7 @@ requires-dist = [
     { name = "catppuccin", specifier = ">=2.5.0" },
     { name = "datasets", specifier = ">=4.8.5" },
     { name = "huggingface-hub", specifier = ">=1.14.0" },
-    { name = "persona-vectors", specifier = ">=0.8.3" },
     { name = "plotly", specifier = ">=6.6.0" },
     { name = "python-dotenv", specifier = ">=1.2.2" },
     { name = "safetensors", specifier = ">=0.7.0" },
@@ -1620,7 +1620,7 @@ dev = [{ name = "pytest", specifier = ">=9.0.3" }]
 [[package]]
 name = "persona-vectors"
-version = "0.8.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "datasets" },
@@ -1639,9 +1639,9 @@ dependencies = [
     { name = "transformers" },
     { name = "umap-learn" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c0/1d/472284f43e2a276a035e9e3de08a92654945193699598def6d6a2aa74c96/persona_vectors-0.8.3.tar.gz", hash = "sha256:f0519846b3712865bd2562cd239df05ddd006ac3d1e73e5ec5a6c860aaed5b2e", size = 43146, upload-time = "2026-05-17T12:43:13.601Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/60/d1/a38dc354718310122cd5d3de63e3aa9060490c8db4c2eadb1d4985684796/persona_vectors-0.8.3-py3-none-any.whl", hash = "sha256:2feeaf45b071ed417d88add48a1012455c8027e4f839e99658a9808c26786b8a", size = 53129, upload-time = "2026-05-17T12:43:12.693Z" },
 ]
 [[package]]

     { name = "catppuccin", specifier = ">=2.5.0" },
     { name = "datasets", specifier = ">=4.8.5" },
     { name = "huggingface-hub", specifier = ">=1.14.0" },
+    { name = "persona-vectors", specifier = ">=0.8.4" },
     { name = "plotly", specifier = ">=6.6.0" },
     { name = "python-dotenv", specifier = ">=1.2.2" },
     { name = "safetensors", specifier = ">=0.7.0" },
 [[package]]
 name = "persona-vectors"
+version = "0.8.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "datasets" },
     { name = "transformers" },
     { name = "umap-learn" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/65/e4/9f7d9e082d3719e7b0e808b853c74795a902c2c433a9bf5cab1bfe712385/persona_vectors-0.8.4.tar.gz", hash = "sha256:46a941c6f6c4029c0ac32c103c9f8c9574fdb3a288fb07b9477c13e08b6941e8", size = 43333, upload-time = "2026-05-18T17:28:07.812Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/4e/6f/25f63c81c0ac7f5daafe8a18a23a11b351be982109f8e12d615f9bb97080/persona_vectors-0.8.4-py3-none-any.whl", hash = "sha256:4f3de83a4527c432e8974e509bfc0e92dfc53a199ee52421a217bfc2edfbe0d0", size = 53324, upload-time = "2026-05-18T17:28:06.862Z" },
 ]
 [[package]]