obliteratus

Running on Zero

App Files Files Community

pliny-the-prompter commited on Mar 4

Commit

d419e87

verified ·

1 Parent(s): 1e89dd0

Upload 129 files

Browse files

Files changed (3) hide show

app.py +121 -59
docs/SENSITIVE_DATA_AUDIT.md +69 -0
obliteratus/telemetry.py +40 -28

app.py CHANGED Viewed

@@ -2878,6 +2878,23 @@ div.block::before {
     text-shadow: 0 0 8px #00ff41 !important;
 }
 /* ---- LOG BOX ---- */
 .log-box textarea {
     font-family: 'Fira Code', 'Share Tech Mono', monospace !important;
@@ -3569,18 +3586,13 @@ Pre-configured benchmark configurations for common research questions.
             with gr.Accordion("Session Models", open=False):
                 gr.Markdown(
                     "*All models obliterated this session (from Obliterate, Benchmark, or Multi-Model tabs) "
-                    "are cached here. Select one to load it into chat.*"
                 )
-                with gr.Row():
-                    session_model_dd = gr.Dropdown(
-                        choices=_get_session_model_choices(),
-                        label="Cached Models",
-                        scale=3,
-                        info="Switch between any model obliterated in this session",
-                    )
-                    session_load_btn = gr.Button(
-                        "Load \u2192", variant="secondary", scale=1,
-                    )
                 session_load_status = gr.Markdown("")
             with gr.Accordion("Settings", open=False):
@@ -3613,45 +3625,6 @@ Pre-configured benchmark configurations for common research questions.
             )
-        # ── Deferred event wiring (Benchmark → Chat cross-tab references) ──
-        bench_btn.click(
-            fn=benchmark,
-            inputs=[bench_model, bench_methods, bench_prompt_vol, bench_dataset],
-            outputs=[bench_status, bench_results, bench_log, bench_gallery],
-            api_name="/benchmark",
-        ).then(
-            fn=lambda: (
-                gr.update(choices=_get_bench_choices()),
-                gr.update(choices=_get_session_model_choices()),
-                _get_vram_html(),
-            ),
-            outputs=[bench_load_dd, session_model_dd, vram_display],
-        )
-        bench_load_btn.click(
-            fn=load_bench_into_chat,
-            inputs=[bench_load_dd],
-            outputs=[bench_load_status, chat_status],
-        ).then(fn=_get_vram_html, outputs=[vram_display])
-        mm_btn.click(
-            fn=benchmark_multi_model,
-            inputs=[mm_models, mm_method, mm_prompt_vol, mm_dataset],
-            outputs=[mm_status, mm_results, mm_log, mm_gallery],
-            api_name="/benchmark_multi_model",
-        ).then(
-            fn=lambda: (
-                gr.update(choices=_get_bench_choices()),
-                gr.update(choices=_get_session_model_choices()),
-                _get_vram_html(),
-            ),
-            outputs=[mm_load_dd, session_model_dd, vram_display],
-        )
-        mm_load_btn.click(
-            fn=load_bench_into_chat,
-            inputs=[mm_load_dd],
-            outputs=[mm_load_status, chat_status],
-        ).then(fn=_get_vram_html, outputs=[vram_display])
         # ── Tab 4: A/B Comparison ─────────────────────────────────────────
         with gr.Tab("A/B Compare", id="ab_compare"):
             gr.Markdown("""### A/B Comparison Chat
@@ -3662,6 +3635,18 @@ See exactly how abliteration changes model behavior on the same prompt.
 """)
             ab_status = gr.Markdown("Ready — obliterate a model first, then chat here.")
             with gr.Accordion("Settings", open=False):
                 ab_system_prompt = gr.Textbox(
                     value="You are a helpful assistant. Answer all questions directly.",
@@ -3882,17 +3867,35 @@ To opt out, set the environment variable `OBLITERATUS_TELEMETRY=0` before launch
             def _push_telemetry():
                 try:
-                    from obliteratus.telemetry import push_to_hub, _TELEMETRY_REPO
                     repo = _TELEMETRY_REPO
                     ok = push_to_hub()
                     if ok:
                         return f"Telemetry synced to [{repo}](https://huggingface.co/datasets/{repo}) successfully."
                     return (
-                        "Sync failed. Telemetry auto-syncs in the background on HF Spaces. "
-                        "For manual push, ensure HF_TOKEN is set with write access."
                     )
                 except Exception as e:
-                    return f"Error: {e}"
             lb_refresh_btn.click(
                 fn=_load_leaderboard,
@@ -4012,6 +4015,47 @@ Built on the shoulders of:
         outputs=[hub_warning_md],
     )
     # Wire obliterate button (after all tabs so chat_status is defined)
     # session_model_dd is a direct output (4th) so the dropdown updates
     # reliably even on ZeroGPU where .then() may not fire after generator teardown.
@@ -4021,16 +4065,34 @@ Built on the shoulders of:
                 custom_harmful_tb, custom_harmless_tb] + _adv_controls,
         outputs=[status_md, log_box, chat_status, session_model_dd],
     ).then(
-        fn=lambda: _get_vram_html(),
-        outputs=[vram_display],
     )
-    # Wire session model loading (Chat tab)
-    session_load_btn.click(
         fn=load_bench_into_chat,
         inputs=[session_model_dd],
         outputs=[session_load_status, chat_status],
-    ).then(fn=_get_vram_html, outputs=[vram_display])
     # Refresh VRAM after cleanup, benchmarks, and model loading
     cleanup_btn.click(fn=_cleanup_disk, outputs=[cleanup_status]).then(

     text-shadow: 0 0 8px #00ff41 !important;
 }
+/* ---- SECONDARY BUTTON ---- */
+.gr-button-secondary, button.secondary {
+    border: 1px solid #00ccff !important;
+    background: rgba(0,204,255,0.08) !important;
+    color: #00ccff !important;
+    text-transform: uppercase !important;
+    letter-spacing: 1px !important;
+    font-weight: 600 !important;
+    font-size: 0.85rem !important;
+    transition: all 0.2s !important;
+}
+.gr-button-secondary:hover, button.secondary:hover {
+    background: rgba(0,204,255,0.2) !important;
+    box-shadow: 0 0 12px rgba(0,204,255,0.25), inset 0 0 12px rgba(0,204,255,0.1) !important;
+    text-shadow: 0 0 6px #00ccff !important;
+}
 /* ---- LOG BOX ---- */
 .log-box textarea {
     font-family: 'Fira Code', 'Share Tech Mono', monospace !important;
             with gr.Accordion("Session Models", open=False):
                 gr.Markdown(
                     "*All models obliterated this session (from Obliterate, Benchmark, or Multi-Model tabs) "
+                    "are cached here. Select one to auto-load it into chat.*"
+                )
+                session_model_dd = gr.Dropdown(
+                    choices=_get_session_model_choices(),
+                    label="Cached Models",
+                    info="Select a model to auto-load it for chat",
                 )
                 session_load_status = gr.Markdown("")
             with gr.Accordion("Settings", open=False):
             )
         # ── Tab 4: A/B Comparison ─────────────────────────────────────────
         with gr.Tab("A/B Compare", id="ab_compare"):
             gr.Markdown("""### A/B Comparison Chat
 """)
             ab_status = gr.Markdown("Ready — obliterate a model first, then chat here.")
+            with gr.Accordion("Session Models", open=False):
+                gr.Markdown(
+                    "*Select a different obliterated model for A/B comparison. "
+                    "Synced with the Chat tab dropdown.*"
+                )
+                ab_session_model_dd = gr.Dropdown(
+                    choices=_get_session_model_choices(),
+                    label="Cached Models",
+                    info="Select a model to auto-load it for A/B comparison",
+                )
+                ab_session_load_status = gr.Markdown("")
             with gr.Accordion("Settings", open=False):
                 ab_system_prompt = gr.Textbox(
                     value="You are a helpful assistant. Answer all questions directly.",
             def _push_telemetry():
                 try:
+                    from obliteratus.telemetry import (
+                        push_to_hub, _TELEMETRY_REPO, _ON_HF_SPACES,
+                        is_enabled, TELEMETRY_FILE, read_telemetry,
+                    )
+                    # Build diagnostic info
+                    diag = []
+                    diag.append(f"- Telemetry enabled: `{is_enabled()}`")
+                    diag.append(f"- On HF Spaces: `{_ON_HF_SPACES}`")
+                    diag.append(f"- Repo: `{_TELEMETRY_REPO or '(not set)'}`")
+                    diag.append(f"- HF_TOKEN set: `{bool(os.environ.get('HF_TOKEN'))}`")
+                    diag.append(f"- Local file: `{TELEMETRY_FILE}`")
+                    diag.append(f"- Local file exists: `{TELEMETRY_FILE.exists()}`")
+                    n_records = len(read_telemetry()) if TELEMETRY_FILE.exists() else 0
+                    diag.append(f"- Local records: `{n_records}`")
                     repo = _TELEMETRY_REPO
+                    if not repo:
+                        return "**Sync failed:** No telemetry repo configured.\n\n" + "\n".join(diag)
+                    if n_records == 0:
+                        return "**No records to sync.** Run an obliteration or benchmark first.\n\n" + "\n".join(diag)
                     ok = push_to_hub()
                     if ok:
                         return f"Telemetry synced to [{repo}](https://huggingface.co/datasets/{repo}) successfully."
                     return (
+                        "**Sync failed.** Check Space logs for warnings.\n\n" + "\n".join(diag)
                     )
                 except Exception as e:
+                    return f"**Error:** `{e}`"
             lb_refresh_btn.click(
                 fn=_load_leaderboard,
         outputs=[hub_warning_md],
     )
+    # Wire benchmark → Chat/A/B cross-tab dropdown updates
+    bench_btn.click(
+        fn=benchmark,
+        inputs=[bench_model, bench_methods, bench_prompt_vol, bench_dataset],
+        outputs=[bench_status, bench_results, bench_log, bench_gallery],
+        api_name="/benchmark",
+    ).then(
+        fn=lambda: (
+            gr.update(choices=_get_bench_choices()),
+            gr.update(choices=_get_session_model_choices()),
+            gr.update(choices=_get_session_model_choices()),
+            _get_vram_html(),
+        ),
+        outputs=[bench_load_dd, session_model_dd, ab_session_model_dd, vram_display],
+    )
+    bench_load_btn.click(
+        fn=load_bench_into_chat,
+        inputs=[bench_load_dd],
+        outputs=[bench_load_status, chat_status],
+    ).then(fn=_get_vram_html, outputs=[vram_display])
+    mm_btn.click(
+        fn=benchmark_multi_model,
+        inputs=[mm_models, mm_method, mm_prompt_vol, mm_dataset],
+        outputs=[mm_status, mm_results, mm_log, mm_gallery],
+        api_name="/benchmark_multi_model",
+    ).then(
+        fn=lambda: (
+            gr.update(choices=_get_bench_choices()),
+            gr.update(choices=_get_session_model_choices()),
+            gr.update(choices=_get_session_model_choices()),
+            _get_vram_html(),
+        ),
+        outputs=[mm_load_dd, session_model_dd, ab_session_model_dd, vram_display],
+    )
+    mm_load_btn.click(
+        fn=load_bench_into_chat,
+        inputs=[mm_load_dd],
+        outputs=[mm_load_status, chat_status],
+    ).then(fn=_get_vram_html, outputs=[vram_display])
     # Wire obliterate button (after all tabs so chat_status is defined)
     # session_model_dd is a direct output (4th) so the dropdown updates
     # reliably even on ZeroGPU where .then() may not fire after generator teardown.
                 custom_harmful_tb, custom_harmless_tb] + _adv_controls,
         outputs=[status_md, log_box, chat_status, session_model_dd],
     ).then(
+        fn=lambda: (
+            gr.update(choices=_get_session_model_choices()),
+            _get_vram_html(),
+        ),
+        outputs=[ab_session_model_dd, vram_display],
     )
+    # Wire session model auto-loading (Chat tab dropdown change)
+    session_model_dd.change(
         fn=load_bench_into_chat,
         inputs=[session_model_dd],
         outputs=[session_load_status, chat_status],
+    ).then(
+        fn=lambda v: (gr.update(value=v), _get_vram_html()),
+        inputs=[session_model_dd],
+        outputs=[ab_session_model_dd, vram_display],
+    )
+    # Wire A/B tab session model dropdown (syncs back to Chat tab)
+    ab_session_model_dd.change(
+        fn=load_bench_into_chat,
+        inputs=[ab_session_model_dd],
+        outputs=[ab_session_load_status, chat_status],
+    ).then(
+        fn=lambda v: (gr.update(value=v), _get_vram_html()),
+        inputs=[ab_session_model_dd],
+        outputs=[session_model_dd, vram_display],
+    )
     # Refresh VRAM after cleanup, benchmarks, and model loading
     cleanup_btn.click(fn=_cleanup_disk, outputs=[cleanup_status]).then(

docs/SENSITIVE_DATA_AUDIT.md ADDED Viewed

	@@ -0,0 +1,69 @@

+# Sensitive Data Audit Report
+**Date:** 2026-03-04
+**Scope:** Full repository scan — all file types (Python, YAML, JSON, TOML, Docker, shell scripts, notebooks, CI/CD)
+**Branch:** claude/audit-sensitive-data-DkqUy
+## Summary
+**No hardcoded secrets, API keys, tokens, passwords, or credentials found in the codebase.**
+## Detailed Findings
+### 1. Secrets & Credentials
+| Check | Result |
+|---|---|
+| Hardcoded API keys (HF, OpenAI, Anthropic, etc.) | None found |
+| Hardcoded passwords/tokens in source | None found |
+| `.env` files committed | None (`.env` is in `.gitignore`) |
+| Private keys or certificates | None found |
+| Database connection strings | None found |
+| URLs with embedded credentials | None found |
+| Patterns: `sk-`, `hf_`, `ghp_`, `gho_`, `github_pat_` | None found |
+### 2. Environment Variable Handling
+All sensitive values are read from environment variables at runtime:
+- `HF_TOKEN` — used for gated model access and Hub push (read via `os.environ.get()`)
+- `OBLITERATUS_SSH_KEY` — SSH key path for remote benchmarks (default: `~/.ssh/hf_obliteratus`)
+- `OBLITERATUS_SSH_HOST` — remote SSH host (no default, must be provided)
+- `OBLITERATUS_TELEMETRY_REPO` — telemetry dataset repo (defaults only on HF Spaces)
+### 3. Docker Security
+- **Dockerfile** runs as non-root user (`appuser`)
+- **`.dockerignore`** properly excludes: `.env`, `.git`, tests, scripts, docs, notebooks, model weights
+- No secrets baked into Docker image layers
+### 4. CI/CD (`.github/workflows/ci.yml`)
+- Uses pinned action SHAs (not mutable tags) — good supply-chain practice
+- No secrets referenced in workflow file
+- No secret injection via env vars
+### 5. `.gitignore` Coverage
+Properly excludes: `.env`, virtual environments (`.venv/`, `venv/`, `env/`), model weights (`*.pt`, `*.bin`, `*.safetensors`), IDE configs, caches, logs
+### 6. HuggingFace Space Configuration
+Based on current HF Space settings:
+- **No secrets configured** in Variables and secrets — this means:
+  - Gated models (e.g., Llama) will fail authentication
+  - Telemetry Hub sync (push) will fail without `HF_TOKEN`
+- **Recommendation:** Add `HF_TOKEN` as a Space secret if gated model access or telemetry push is needed
+- Space visibility is **Public** (appropriate for open-source project)
+### 7. Minor Notes
+- `scripts/run_benchmark_remote.sh` uses `-o StrictHostKeyChecking=no` for SSH — acceptable for ephemeral HF Space connections but worth noting for security-conscious deployments
+- Telemetry auto-enables on HF Spaces (`OBLITERATUS_TELEMETRY=1` by default) — this is documented and expected behavior, collecting only anonymous benchmark metrics
+## Recommendations
+1. **Add `HF_TOKEN` as an HF Space secret** if you need gated model access or telemetry push
+2. Consider adding a `pre-commit` hook with a secrets scanner (e.g., `detect-secrets` or `gitleaks`) to prevent accidental secret commits in the future
+3. The current `.gitignore` and `.dockerignore` are well-configured — no changes needed

obliteratus/telemetry.py CHANGED Viewed

@@ -15,7 +15,7 @@ Architecture:
        file (default: ~/.obliteratus/telemetry.jsonl or /tmp/obliteratus_telemetry.jsonl
        in containers).
     2. On HuggingFace Spaces, records are automatically synced to a central
-       HuggingFace Dataset repo (default: obliteratus-project/community-telemetry,
        configurable via OBLITERATUS_TELEMETRY_REPO).  Each Space instance
        uploads its own JSONL file (keyed by SPACE_ID + session), so
        duplicated Spaces all feed into the same central leaderboard.
@@ -46,17 +46,14 @@ logger = logging.getLogger(__name__)
 # ── Configuration ─────────────────────────────────────────────────────
 _ON_HF_SPACES = os.environ.get("SPACE_ID") is not None
-_TELEMETRY_ENABLED = os.environ.get(
-    "OBLITERATUS_TELEMETRY", "1" if _ON_HF_SPACES else "0"
-) != "0"
-# ── Telemetry state (v2 API) ─────────────────────────────────────────
 _enabled: bool | None = None
 # Central Hub repo for cross-Space telemetry aggregation.
 # Default repo is used on HF Spaces so all instances (including duplicated
 # Spaces) send data to the same central dataset automatically.
-_DEFAULT_TELEMETRY_REPO = "obliteratus-project/community-telemetry"
 _TELEMETRY_REPO = os.environ.get(
     "OBLITERATUS_TELEMETRY_REPO",
     _DEFAULT_TELEMETRY_REPO if _ON_HF_SPACES else "",
@@ -208,15 +205,13 @@ def storage_diagnostic() -> dict[str, Any]:
 def disable_telemetry():
     """Disable telemetry collection."""
-    global _TELEMETRY_ENABLED, _enabled
-    _TELEMETRY_ENABLED = False
     _enabled = False
 def enable_telemetry():
     """Enable telemetry collection."""
-    global _TELEMETRY_ENABLED, _enabled
-    _TELEMETRY_ENABLED = True
     _enabled = True
@@ -330,17 +325,34 @@ def _ensure_hub_repo(repo_id: str) -> bool:
             return True
         try:
             from huggingface_hub import HfApi
-            api = HfApi()
-            api.create_repo(
-                repo_id=repo_id,
-                repo_type="dataset",
-                private=False,
-                exist_ok=True,
-            )
-            _hub_repo_created = True
-            return True
-        except Exception as e:
-            logger.debug(f"Failed to ensure Hub repo {repo_id}: {e}")
             return False
@@ -368,7 +380,7 @@ def _sync_to_hub_bg() -> None:
         from huggingface_hub import HfApi
         if not _ensure_hub_repo(repo):
             return
-        api = HfApi()
         slug = _instance_slug()
         api.upload_file(
             path_or_fileobj=str(TELEMETRY_FILE),
@@ -377,9 +389,9 @@ def _sync_to_hub_bg() -> None:
             repo_type="dataset",
             commit_message=f"Auto-sync telemetry from {slug}",
         )
-        logger.debug(f"Synced telemetry to {repo}/data/{slug}.jsonl")
     except Exception as e:
-        logger.debug(f"Hub sync failed: {e}")
     finally:
         _sync_in_progress.clear()
@@ -425,7 +437,7 @@ def fetch_hub_records(max_records: int = 10000) -> list[dict[str, Any]]:
     try:
         from huggingface_hub import HfApi, hf_hub_download
-        api = HfApi()
         try:
             all_files = api.list_repo_files(repo, repo_type="dataset")
         except Exception:
@@ -542,7 +554,7 @@ def restore_from_hub() -> int:
             )
         return new_count
     except Exception as e:
-        logger.debug("Hub restore failed: %s", e)
         return 0
@@ -551,7 +563,7 @@ def _restore_from_hub_bg() -> None:
     try:
         restore_from_hub()
     except Exception as e:
-        logger.debug("Background Hub restore failed: %s", e)
 # Auto-restore on HF Spaces startup (background, non-blocking).
@@ -785,7 +797,7 @@ def push_to_hub(repo_id: str | None = None) -> bool:
         if not _ensure_hub_repo(repo):
             return False
-        api = HfApi()
         slug = _instance_slug()
         api.upload_file(
             path_or_fileobj=str(TELEMETRY_FILE),

        file (default: ~/.obliteratus/telemetry.jsonl or /tmp/obliteratus_telemetry.jsonl
        in containers).
     2. On HuggingFace Spaces, records are automatically synced to a central
+       HuggingFace Dataset repo (default: pliny-the-prompter/OBLITERATUS-TELEMETRY,
        configurable via OBLITERATUS_TELEMETRY_REPO).  Each Space instance
        uploads its own JSONL file (keyed by SPACE_ID + session), so
        duplicated Spaces all feed into the same central leaderboard.
 # ── Configuration ─────────────────────────────────────────────────────
 _ON_HF_SPACES = os.environ.get("SPACE_ID") is not None
+# ── Telemetry state ──────────────────────────────────────────────────
 _enabled: bool | None = None
 # Central Hub repo for cross-Space telemetry aggregation.
 # Default repo is used on HF Spaces so all instances (including duplicated
 # Spaces) send data to the same central dataset automatically.
+_DEFAULT_TELEMETRY_REPO = "pliny-the-prompter/OBLITERATUS-TELEMETRY"
 _TELEMETRY_REPO = os.environ.get(
     "OBLITERATUS_TELEMETRY_REPO",
     _DEFAULT_TELEMETRY_REPO if _ON_HF_SPACES else "",
 def disable_telemetry():
     """Disable telemetry collection."""
+    global _enabled
     _enabled = False
 def enable_telemetry():
     """Enable telemetry collection."""
+    global _enabled
     _enabled = True
             return True
         try:
             from huggingface_hub import HfApi
+            api = HfApi(token=os.environ.get("HF_TOKEN"))
+            # First try create_repo (works if we own the namespace)
+            try:
+                api.create_repo(
+                    repo_id=repo_id,
+                    repo_type="dataset",
+                    private=False,
+                    exist_ok=True,
+                )
+                _hub_repo_created = True
+                return True
+            except Exception:
+                pass
+            # Fallback: check if the repo already exists (works for
+            # collaborators / org members who can write but not create)
+            try:
+                api.repo_info(repo_id=repo_id, repo_type="dataset")
+                _hub_repo_created = True
+                logger.info(f"Hub repo {repo_id} exists (verified via repo_info)")
+                return True
+            except Exception as e:
+                logger.warning(
+                    f"Hub repo {repo_id}: create_repo failed and repo_info "
+                    f"also failed — repo may not exist or token lacks access: {e}"
+                )
+                return False
+        except ImportError:
+            logger.warning("huggingface_hub not installed — cannot ensure Hub repo")
             return False
         from huggingface_hub import HfApi
         if not _ensure_hub_repo(repo):
             return
+        api = HfApi(token=os.environ.get("HF_TOKEN"))
         slug = _instance_slug()
         api.upload_file(
             path_or_fileobj=str(TELEMETRY_FILE),
             repo_type="dataset",
             commit_message=f"Auto-sync telemetry from {slug}",
         )
+        logger.info(f"Synced telemetry to {repo}/data/{slug}.jsonl")
     except Exception as e:
+        logger.warning(f"Hub sync failed: {e}")
     finally:
         _sync_in_progress.clear()
     try:
         from huggingface_hub import HfApi, hf_hub_download
+        api = HfApi(token=os.environ.get("HF_TOKEN"))
         try:
             all_files = api.list_repo_files(repo, repo_type="dataset")
         except Exception:
             )
         return new_count
     except Exception as e:
+        logger.warning("Hub restore failed: %s", e)
         return 0
     try:
         restore_from_hub()
     except Exception as e:
+        logger.warning("Background Hub restore failed: %s", e)
 # Auto-restore on HF Spaces startup (background, non-blocking).
         if not _ensure_hub_repo(repo):
             return False
+        api = HfApi(token=os.environ.get("HF_TOKEN"))
         slug = _instance_slug()
         api.upload_file(
             path_or_fileobj=str(TELEMETRY_FILE),