Spaces:
Running on Zero
Running on Zero
Upload 129 files
Browse files- app.py +121 -59
- docs/SENSITIVE_DATA_AUDIT.md +69 -0
- obliteratus/telemetry.py +40 -28
app.py
CHANGED
|
@@ -2878,6 +2878,23 @@ div.block::before {
|
|
| 2878 |
text-shadow: 0 0 8px #00ff41 !important;
|
| 2879 |
}
|
| 2880 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2881 |
/* ---- LOG BOX ---- */
|
| 2882 |
.log-box textarea {
|
| 2883 |
font-family: 'Fira Code', 'Share Tech Mono', monospace !important;
|
|
@@ -3569,18 +3586,13 @@ Pre-configured benchmark configurations for common research questions.
|
|
| 3569 |
with gr.Accordion("Session Models", open=False):
|
| 3570 |
gr.Markdown(
|
| 3571 |
"*All models obliterated this session (from Obliterate, Benchmark, or Multi-Model tabs) "
|
| 3572 |
-
"are cached here. Select one to load it into chat.*"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3573 |
)
|
| 3574 |
-
with gr.Row():
|
| 3575 |
-
session_model_dd = gr.Dropdown(
|
| 3576 |
-
choices=_get_session_model_choices(),
|
| 3577 |
-
label="Cached Models",
|
| 3578 |
-
scale=3,
|
| 3579 |
-
info="Switch between any model obliterated in this session",
|
| 3580 |
-
)
|
| 3581 |
-
session_load_btn = gr.Button(
|
| 3582 |
-
"Load \u2192", variant="secondary", scale=1,
|
| 3583 |
-
)
|
| 3584 |
session_load_status = gr.Markdown("")
|
| 3585 |
|
| 3586 |
with gr.Accordion("Settings", open=False):
|
|
@@ -3613,45 +3625,6 @@ Pre-configured benchmark configurations for common research questions.
|
|
| 3613 |
)
|
| 3614 |
|
| 3615 |
|
| 3616 |
-
# ββ Deferred event wiring (Benchmark β Chat cross-tab references) ββ
|
| 3617 |
-
bench_btn.click(
|
| 3618 |
-
fn=benchmark,
|
| 3619 |
-
inputs=[bench_model, bench_methods, bench_prompt_vol, bench_dataset],
|
| 3620 |
-
outputs=[bench_status, bench_results, bench_log, bench_gallery],
|
| 3621 |
-
api_name="/benchmark",
|
| 3622 |
-
).then(
|
| 3623 |
-
fn=lambda: (
|
| 3624 |
-
gr.update(choices=_get_bench_choices()),
|
| 3625 |
-
gr.update(choices=_get_session_model_choices()),
|
| 3626 |
-
_get_vram_html(),
|
| 3627 |
-
),
|
| 3628 |
-
outputs=[bench_load_dd, session_model_dd, vram_display],
|
| 3629 |
-
)
|
| 3630 |
-
bench_load_btn.click(
|
| 3631 |
-
fn=load_bench_into_chat,
|
| 3632 |
-
inputs=[bench_load_dd],
|
| 3633 |
-
outputs=[bench_load_status, chat_status],
|
| 3634 |
-
).then(fn=_get_vram_html, outputs=[vram_display])
|
| 3635 |
-
|
| 3636 |
-
mm_btn.click(
|
| 3637 |
-
fn=benchmark_multi_model,
|
| 3638 |
-
inputs=[mm_models, mm_method, mm_prompt_vol, mm_dataset],
|
| 3639 |
-
outputs=[mm_status, mm_results, mm_log, mm_gallery],
|
| 3640 |
-
api_name="/benchmark_multi_model",
|
| 3641 |
-
).then(
|
| 3642 |
-
fn=lambda: (
|
| 3643 |
-
gr.update(choices=_get_bench_choices()),
|
| 3644 |
-
gr.update(choices=_get_session_model_choices()),
|
| 3645 |
-
_get_vram_html(),
|
| 3646 |
-
),
|
| 3647 |
-
outputs=[mm_load_dd, session_model_dd, vram_display],
|
| 3648 |
-
)
|
| 3649 |
-
mm_load_btn.click(
|
| 3650 |
-
fn=load_bench_into_chat,
|
| 3651 |
-
inputs=[mm_load_dd],
|
| 3652 |
-
outputs=[mm_load_status, chat_status],
|
| 3653 |
-
).then(fn=_get_vram_html, outputs=[vram_display])
|
| 3654 |
-
|
| 3655 |
# ββ Tab 4: A/B Comparison βββββββββββββββββββββββββββββββββββββββββ
|
| 3656 |
with gr.Tab("A/B Compare", id="ab_compare"):
|
| 3657 |
gr.Markdown("""### A/B Comparison Chat
|
|
@@ -3662,6 +3635,18 @@ See exactly how abliteration changes model behavior on the same prompt.
|
|
| 3662 |
""")
|
| 3663 |
ab_status = gr.Markdown("Ready β obliterate a model first, then chat here.")
|
| 3664 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3665 |
with gr.Accordion("Settings", open=False):
|
| 3666 |
ab_system_prompt = gr.Textbox(
|
| 3667 |
value="You are a helpful assistant. Answer all questions directly.",
|
|
@@ -3882,17 +3867,35 @@ To opt out, set the environment variable `OBLITERATUS_TELEMETRY=0` before launch
|
|
| 3882 |
|
| 3883 |
def _push_telemetry():
|
| 3884 |
try:
|
| 3885 |
-
from obliteratus.telemetry import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3886 |
repo = _TELEMETRY_REPO
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3887 |
ok = push_to_hub()
|
| 3888 |
if ok:
|
| 3889 |
return f"Telemetry synced to [{repo}](https://huggingface.co/datasets/{repo}) successfully."
|
| 3890 |
return (
|
| 3891 |
-
"Sync failed.
|
| 3892 |
-
"For manual push, ensure HF_TOKEN is set with write access."
|
| 3893 |
)
|
| 3894 |
except Exception as e:
|
| 3895 |
-
return f"Error: {e}"
|
| 3896 |
|
| 3897 |
lb_refresh_btn.click(
|
| 3898 |
fn=_load_leaderboard,
|
|
@@ -4012,6 +4015,47 @@ Built on the shoulders of:
|
|
| 4012 |
outputs=[hub_warning_md],
|
| 4013 |
)
|
| 4014 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4015 |
# Wire obliterate button (after all tabs so chat_status is defined)
|
| 4016 |
# session_model_dd is a direct output (4th) so the dropdown updates
|
| 4017 |
# reliably even on ZeroGPU where .then() may not fire after generator teardown.
|
|
@@ -4021,16 +4065,34 @@ Built on the shoulders of:
|
|
| 4021 |
custom_harmful_tb, custom_harmless_tb] + _adv_controls,
|
| 4022 |
outputs=[status_md, log_box, chat_status, session_model_dd],
|
| 4023 |
).then(
|
| 4024 |
-
fn=lambda:
|
| 4025 |
-
|
|
|
|
|
|
|
|
|
|
| 4026 |
)
|
| 4027 |
|
| 4028 |
-
# Wire session model loading (Chat tab)
|
| 4029 |
-
|
| 4030 |
fn=load_bench_into_chat,
|
| 4031 |
inputs=[session_model_dd],
|
| 4032 |
outputs=[session_load_status, chat_status],
|
| 4033 |
-
).then(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4034 |
|
| 4035 |
# Refresh VRAM after cleanup, benchmarks, and model loading
|
| 4036 |
cleanup_btn.click(fn=_cleanup_disk, outputs=[cleanup_status]).then(
|
|
|
|
| 2878 |
text-shadow: 0 0 8px #00ff41 !important;
|
| 2879 |
}
|
| 2880 |
|
| 2881 |
+
/* ---- SECONDARY BUTTON ---- */
|
| 2882 |
+
.gr-button-secondary, button.secondary {
|
| 2883 |
+
border: 1px solid #00ccff !important;
|
| 2884 |
+
background: rgba(0,204,255,0.08) !important;
|
| 2885 |
+
color: #00ccff !important;
|
| 2886 |
+
text-transform: uppercase !important;
|
| 2887 |
+
letter-spacing: 1px !important;
|
| 2888 |
+
font-weight: 600 !important;
|
| 2889 |
+
font-size: 0.85rem !important;
|
| 2890 |
+
transition: all 0.2s !important;
|
| 2891 |
+
}
|
| 2892 |
+
.gr-button-secondary:hover, button.secondary:hover {
|
| 2893 |
+
background: rgba(0,204,255,0.2) !important;
|
| 2894 |
+
box-shadow: 0 0 12px rgba(0,204,255,0.25), inset 0 0 12px rgba(0,204,255,0.1) !important;
|
| 2895 |
+
text-shadow: 0 0 6px #00ccff !important;
|
| 2896 |
+
}
|
| 2897 |
+
|
| 2898 |
/* ---- LOG BOX ---- */
|
| 2899 |
.log-box textarea {
|
| 2900 |
font-family: 'Fira Code', 'Share Tech Mono', monospace !important;
|
|
|
|
| 3586 |
with gr.Accordion("Session Models", open=False):
|
| 3587 |
gr.Markdown(
|
| 3588 |
"*All models obliterated this session (from Obliterate, Benchmark, or Multi-Model tabs) "
|
| 3589 |
+
"are cached here. Select one to auto-load it into chat.*"
|
| 3590 |
+
)
|
| 3591 |
+
session_model_dd = gr.Dropdown(
|
| 3592 |
+
choices=_get_session_model_choices(),
|
| 3593 |
+
label="Cached Models",
|
| 3594 |
+
info="Select a model to auto-load it for chat",
|
| 3595 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3596 |
session_load_status = gr.Markdown("")
|
| 3597 |
|
| 3598 |
with gr.Accordion("Settings", open=False):
|
|
|
|
| 3625 |
)
|
| 3626 |
|
| 3627 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3628 |
# ββ Tab 4: A/B Comparison βββββββββββββββββββββββββββββββββββββββββ
|
| 3629 |
with gr.Tab("A/B Compare", id="ab_compare"):
|
| 3630 |
gr.Markdown("""### A/B Comparison Chat
|
|
|
|
| 3635 |
""")
|
| 3636 |
ab_status = gr.Markdown("Ready β obliterate a model first, then chat here.")
|
| 3637 |
|
| 3638 |
+
with gr.Accordion("Session Models", open=False):
|
| 3639 |
+
gr.Markdown(
|
| 3640 |
+
"*Select a different obliterated model for A/B comparison. "
|
| 3641 |
+
"Synced with the Chat tab dropdown.*"
|
| 3642 |
+
)
|
| 3643 |
+
ab_session_model_dd = gr.Dropdown(
|
| 3644 |
+
choices=_get_session_model_choices(),
|
| 3645 |
+
label="Cached Models",
|
| 3646 |
+
info="Select a model to auto-load it for A/B comparison",
|
| 3647 |
+
)
|
| 3648 |
+
ab_session_load_status = gr.Markdown("")
|
| 3649 |
+
|
| 3650 |
with gr.Accordion("Settings", open=False):
|
| 3651 |
ab_system_prompt = gr.Textbox(
|
| 3652 |
value="You are a helpful assistant. Answer all questions directly.",
|
|
|
|
| 3867 |
|
| 3868 |
def _push_telemetry():
|
| 3869 |
try:
|
| 3870 |
+
from obliteratus.telemetry import (
|
| 3871 |
+
push_to_hub, _TELEMETRY_REPO, _ON_HF_SPACES,
|
| 3872 |
+
is_enabled, TELEMETRY_FILE, read_telemetry,
|
| 3873 |
+
)
|
| 3874 |
+
# Build diagnostic info
|
| 3875 |
+
diag = []
|
| 3876 |
+
diag.append(f"- Telemetry enabled: `{is_enabled()}`")
|
| 3877 |
+
diag.append(f"- On HF Spaces: `{_ON_HF_SPACES}`")
|
| 3878 |
+
diag.append(f"- Repo: `{_TELEMETRY_REPO or '(not set)'}`")
|
| 3879 |
+
diag.append(f"- HF_TOKEN set: `{bool(os.environ.get('HF_TOKEN'))}`")
|
| 3880 |
+
diag.append(f"- Local file: `{TELEMETRY_FILE}`")
|
| 3881 |
+
diag.append(f"- Local file exists: `{TELEMETRY_FILE.exists()}`")
|
| 3882 |
+
n_records = len(read_telemetry()) if TELEMETRY_FILE.exists() else 0
|
| 3883 |
+
diag.append(f"- Local records: `{n_records}`")
|
| 3884 |
+
|
| 3885 |
repo = _TELEMETRY_REPO
|
| 3886 |
+
if not repo:
|
| 3887 |
+
return "**Sync failed:** No telemetry repo configured.\n\n" + "\n".join(diag)
|
| 3888 |
+
if n_records == 0:
|
| 3889 |
+
return "**No records to sync.** Run an obliteration or benchmark first.\n\n" + "\n".join(diag)
|
| 3890 |
+
|
| 3891 |
ok = push_to_hub()
|
| 3892 |
if ok:
|
| 3893 |
return f"Telemetry synced to [{repo}](https://huggingface.co/datasets/{repo}) successfully."
|
| 3894 |
return (
|
| 3895 |
+
"**Sync failed.** Check Space logs for warnings.\n\n" + "\n".join(diag)
|
|
|
|
| 3896 |
)
|
| 3897 |
except Exception as e:
|
| 3898 |
+
return f"**Error:** `{e}`"
|
| 3899 |
|
| 3900 |
lb_refresh_btn.click(
|
| 3901 |
fn=_load_leaderboard,
|
|
|
|
| 4015 |
outputs=[hub_warning_md],
|
| 4016 |
)
|
| 4017 |
|
| 4018 |
+
# Wire benchmark β Chat/A/B cross-tab dropdown updates
|
| 4019 |
+
bench_btn.click(
|
| 4020 |
+
fn=benchmark,
|
| 4021 |
+
inputs=[bench_model, bench_methods, bench_prompt_vol, bench_dataset],
|
| 4022 |
+
outputs=[bench_status, bench_results, bench_log, bench_gallery],
|
| 4023 |
+
api_name="/benchmark",
|
| 4024 |
+
).then(
|
| 4025 |
+
fn=lambda: (
|
| 4026 |
+
gr.update(choices=_get_bench_choices()),
|
| 4027 |
+
gr.update(choices=_get_session_model_choices()),
|
| 4028 |
+
gr.update(choices=_get_session_model_choices()),
|
| 4029 |
+
_get_vram_html(),
|
| 4030 |
+
),
|
| 4031 |
+
outputs=[bench_load_dd, session_model_dd, ab_session_model_dd, vram_display],
|
| 4032 |
+
)
|
| 4033 |
+
bench_load_btn.click(
|
| 4034 |
+
fn=load_bench_into_chat,
|
| 4035 |
+
inputs=[bench_load_dd],
|
| 4036 |
+
outputs=[bench_load_status, chat_status],
|
| 4037 |
+
).then(fn=_get_vram_html, outputs=[vram_display])
|
| 4038 |
+
|
| 4039 |
+
mm_btn.click(
|
| 4040 |
+
fn=benchmark_multi_model,
|
| 4041 |
+
inputs=[mm_models, mm_method, mm_prompt_vol, mm_dataset],
|
| 4042 |
+
outputs=[mm_status, mm_results, mm_log, mm_gallery],
|
| 4043 |
+
api_name="/benchmark_multi_model",
|
| 4044 |
+
).then(
|
| 4045 |
+
fn=lambda: (
|
| 4046 |
+
gr.update(choices=_get_bench_choices()),
|
| 4047 |
+
gr.update(choices=_get_session_model_choices()),
|
| 4048 |
+
gr.update(choices=_get_session_model_choices()),
|
| 4049 |
+
_get_vram_html(),
|
| 4050 |
+
),
|
| 4051 |
+
outputs=[mm_load_dd, session_model_dd, ab_session_model_dd, vram_display],
|
| 4052 |
+
)
|
| 4053 |
+
mm_load_btn.click(
|
| 4054 |
+
fn=load_bench_into_chat,
|
| 4055 |
+
inputs=[mm_load_dd],
|
| 4056 |
+
outputs=[mm_load_status, chat_status],
|
| 4057 |
+
).then(fn=_get_vram_html, outputs=[vram_display])
|
| 4058 |
+
|
| 4059 |
# Wire obliterate button (after all tabs so chat_status is defined)
|
| 4060 |
# session_model_dd is a direct output (4th) so the dropdown updates
|
| 4061 |
# reliably even on ZeroGPU where .then() may not fire after generator teardown.
|
|
|
|
| 4065 |
custom_harmful_tb, custom_harmless_tb] + _adv_controls,
|
| 4066 |
outputs=[status_md, log_box, chat_status, session_model_dd],
|
| 4067 |
).then(
|
| 4068 |
+
fn=lambda: (
|
| 4069 |
+
gr.update(choices=_get_session_model_choices()),
|
| 4070 |
+
_get_vram_html(),
|
| 4071 |
+
),
|
| 4072 |
+
outputs=[ab_session_model_dd, vram_display],
|
| 4073 |
)
|
| 4074 |
|
| 4075 |
+
# Wire session model auto-loading (Chat tab dropdown change)
|
| 4076 |
+
session_model_dd.change(
|
| 4077 |
fn=load_bench_into_chat,
|
| 4078 |
inputs=[session_model_dd],
|
| 4079 |
outputs=[session_load_status, chat_status],
|
| 4080 |
+
).then(
|
| 4081 |
+
fn=lambda v: (gr.update(value=v), _get_vram_html()),
|
| 4082 |
+
inputs=[session_model_dd],
|
| 4083 |
+
outputs=[ab_session_model_dd, vram_display],
|
| 4084 |
+
)
|
| 4085 |
+
|
| 4086 |
+
# Wire A/B tab session model dropdown (syncs back to Chat tab)
|
| 4087 |
+
ab_session_model_dd.change(
|
| 4088 |
+
fn=load_bench_into_chat,
|
| 4089 |
+
inputs=[ab_session_model_dd],
|
| 4090 |
+
outputs=[ab_session_load_status, chat_status],
|
| 4091 |
+
).then(
|
| 4092 |
+
fn=lambda v: (gr.update(value=v), _get_vram_html()),
|
| 4093 |
+
inputs=[ab_session_model_dd],
|
| 4094 |
+
outputs=[session_model_dd, vram_display],
|
| 4095 |
+
)
|
| 4096 |
|
| 4097 |
# Refresh VRAM after cleanup, benchmarks, and model loading
|
| 4098 |
cleanup_btn.click(fn=_cleanup_disk, outputs=[cleanup_status]).then(
|
docs/SENSITIVE_DATA_AUDIT.md
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Sensitive Data Audit Report
|
| 2 |
+
|
| 3 |
+
**Date:** 2026-03-04
|
| 4 |
+
**Scope:** Full repository scan β all file types (Python, YAML, JSON, TOML, Docker, shell scripts, notebooks, CI/CD)
|
| 5 |
+
**Branch:** claude/audit-sensitive-data-DkqUy
|
| 6 |
+
|
| 7 |
+
## Summary
|
| 8 |
+
|
| 9 |
+
**No hardcoded secrets, API keys, tokens, passwords, or credentials found in the codebase.**
|
| 10 |
+
|
| 11 |
+
## Detailed Findings
|
| 12 |
+
|
| 13 |
+
### 1. Secrets & Credentials
|
| 14 |
+
|
| 15 |
+
| Check | Result |
|
| 16 |
+
|---|---|
|
| 17 |
+
| Hardcoded API keys (HF, OpenAI, Anthropic, etc.) | None found |
|
| 18 |
+
| Hardcoded passwords/tokens in source | None found |
|
| 19 |
+
| `.env` files committed | None (`.env` is in `.gitignore`) |
|
| 20 |
+
| Private keys or certificates | None found |
|
| 21 |
+
| Database connection strings | None found |
|
| 22 |
+
| URLs with embedded credentials | None found |
|
| 23 |
+
| Patterns: `sk-`, `hf_`, `ghp_`, `gho_`, `github_pat_` | None found |
|
| 24 |
+
|
| 25 |
+
### 2. Environment Variable Handling
|
| 26 |
+
|
| 27 |
+
All sensitive values are read from environment variables at runtime:
|
| 28 |
+
|
| 29 |
+
- `HF_TOKEN` β used for gated model access and Hub push (read via `os.environ.get()`)
|
| 30 |
+
- `OBLITERATUS_SSH_KEY` β SSH key path for remote benchmarks (default: `~/.ssh/hf_obliteratus`)
|
| 31 |
+
- `OBLITERATUS_SSH_HOST` β remote SSH host (no default, must be provided)
|
| 32 |
+
- `OBLITERATUS_TELEMETRY_REPO` β telemetry dataset repo (defaults only on HF Spaces)
|
| 33 |
+
|
| 34 |
+
### 3. Docker Security
|
| 35 |
+
|
| 36 |
+
- **Dockerfile** runs as non-root user (`appuser`)
|
| 37 |
+
- **`.dockerignore`** properly excludes: `.env`, `.git`, tests, scripts, docs, notebooks, model weights
|
| 38 |
+
- No secrets baked into Docker image layers
|
| 39 |
+
|
| 40 |
+
### 4. CI/CD (`.github/workflows/ci.yml`)
|
| 41 |
+
|
| 42 |
+
- Uses pinned action SHAs (not mutable tags) β good supply-chain practice
|
| 43 |
+
- No secrets referenced in workflow file
|
| 44 |
+
- No secret injection via env vars
|
| 45 |
+
|
| 46 |
+
### 5. `.gitignore` Coverage
|
| 47 |
+
|
| 48 |
+
Properly excludes: `.env`, virtual environments (`.venv/`, `venv/`, `env/`), model weights (`*.pt`, `*.bin`, `*.safetensors`), IDE configs, caches, logs
|
| 49 |
+
|
| 50 |
+
### 6. HuggingFace Space Configuration
|
| 51 |
+
|
| 52 |
+
Based on current HF Space settings:
|
| 53 |
+
|
| 54 |
+
- **No secrets configured** in Variables and secrets β this means:
|
| 55 |
+
- Gated models (e.g., Llama) will fail authentication
|
| 56 |
+
- Telemetry Hub sync (push) will fail without `HF_TOKEN`
|
| 57 |
+
- **Recommendation:** Add `HF_TOKEN` as a Space secret if gated model access or telemetry push is needed
|
| 58 |
+
- Space visibility is **Public** (appropriate for open-source project)
|
| 59 |
+
|
| 60 |
+
### 7. Minor Notes
|
| 61 |
+
|
| 62 |
+
- `scripts/run_benchmark_remote.sh` uses `-o StrictHostKeyChecking=no` for SSH β acceptable for ephemeral HF Space connections but worth noting for security-conscious deployments
|
| 63 |
+
- Telemetry auto-enables on HF Spaces (`OBLITERATUS_TELEMETRY=1` by default) β this is documented and expected behavior, collecting only anonymous benchmark metrics
|
| 64 |
+
|
| 65 |
+
## Recommendations
|
| 66 |
+
|
| 67 |
+
1. **Add `HF_TOKEN` as an HF Space secret** if you need gated model access or telemetry push
|
| 68 |
+
2. Consider adding a `pre-commit` hook with a secrets scanner (e.g., `detect-secrets` or `gitleaks`) to prevent accidental secret commits in the future
|
| 69 |
+
3. The current `.gitignore` and `.dockerignore` are well-configured β no changes needed
|
obliteratus/telemetry.py
CHANGED
|
@@ -15,7 +15,7 @@ Architecture:
|
|
| 15 |
file (default: ~/.obliteratus/telemetry.jsonl or /tmp/obliteratus_telemetry.jsonl
|
| 16 |
in containers).
|
| 17 |
2. On HuggingFace Spaces, records are automatically synced to a central
|
| 18 |
-
HuggingFace Dataset repo (default:
|
| 19 |
configurable via OBLITERATUS_TELEMETRY_REPO). Each Space instance
|
| 20 |
uploads its own JSONL file (keyed by SPACE_ID + session), so
|
| 21 |
duplicated Spaces all feed into the same central leaderboard.
|
|
@@ -46,17 +46,14 @@ logger = logging.getLogger(__name__)
|
|
| 46 |
# ββ Configuration βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 47 |
|
| 48 |
_ON_HF_SPACES = os.environ.get("SPACE_ID") is not None
|
| 49 |
-
_TELEMETRY_ENABLED = os.environ.get(
|
| 50 |
-
"OBLITERATUS_TELEMETRY", "1" if _ON_HF_SPACES else "0"
|
| 51 |
-
) != "0"
|
| 52 |
|
| 53 |
-
# ββ Telemetry state
|
| 54 |
_enabled: bool | None = None
|
| 55 |
|
| 56 |
# Central Hub repo for cross-Space telemetry aggregation.
|
| 57 |
# Default repo is used on HF Spaces so all instances (including duplicated
|
| 58 |
# Spaces) send data to the same central dataset automatically.
|
| 59 |
-
_DEFAULT_TELEMETRY_REPO = "
|
| 60 |
_TELEMETRY_REPO = os.environ.get(
|
| 61 |
"OBLITERATUS_TELEMETRY_REPO",
|
| 62 |
_DEFAULT_TELEMETRY_REPO if _ON_HF_SPACES else "",
|
|
@@ -208,15 +205,13 @@ def storage_diagnostic() -> dict[str, Any]:
|
|
| 208 |
|
| 209 |
def disable_telemetry():
|
| 210 |
"""Disable telemetry collection."""
|
| 211 |
-
global
|
| 212 |
-
_TELEMETRY_ENABLED = False
|
| 213 |
_enabled = False
|
| 214 |
|
| 215 |
|
| 216 |
def enable_telemetry():
|
| 217 |
"""Enable telemetry collection."""
|
| 218 |
-
global
|
| 219 |
-
_TELEMETRY_ENABLED = True
|
| 220 |
_enabled = True
|
| 221 |
|
| 222 |
|
|
@@ -330,17 +325,34 @@ def _ensure_hub_repo(repo_id: str) -> bool:
|
|
| 330 |
return True
|
| 331 |
try:
|
| 332 |
from huggingface_hub import HfApi
|
| 333 |
-
api = HfApi()
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
return False
|
| 345 |
|
| 346 |
|
|
@@ -368,7 +380,7 @@ def _sync_to_hub_bg() -> None:
|
|
| 368 |
from huggingface_hub import HfApi
|
| 369 |
if not _ensure_hub_repo(repo):
|
| 370 |
return
|
| 371 |
-
api = HfApi()
|
| 372 |
slug = _instance_slug()
|
| 373 |
api.upload_file(
|
| 374 |
path_or_fileobj=str(TELEMETRY_FILE),
|
|
@@ -377,9 +389,9 @@ def _sync_to_hub_bg() -> None:
|
|
| 377 |
repo_type="dataset",
|
| 378 |
commit_message=f"Auto-sync telemetry from {slug}",
|
| 379 |
)
|
| 380 |
-
logger.
|
| 381 |
except Exception as e:
|
| 382 |
-
logger.
|
| 383 |
finally:
|
| 384 |
_sync_in_progress.clear()
|
| 385 |
|
|
@@ -425,7 +437,7 @@ def fetch_hub_records(max_records: int = 10000) -> list[dict[str, Any]]:
|
|
| 425 |
try:
|
| 426 |
from huggingface_hub import HfApi, hf_hub_download
|
| 427 |
|
| 428 |
-
api = HfApi()
|
| 429 |
try:
|
| 430 |
all_files = api.list_repo_files(repo, repo_type="dataset")
|
| 431 |
except Exception:
|
|
@@ -542,7 +554,7 @@ def restore_from_hub() -> int:
|
|
| 542 |
)
|
| 543 |
return new_count
|
| 544 |
except Exception as e:
|
| 545 |
-
logger.
|
| 546 |
return 0
|
| 547 |
|
| 548 |
|
|
@@ -551,7 +563,7 @@ def _restore_from_hub_bg() -> None:
|
|
| 551 |
try:
|
| 552 |
restore_from_hub()
|
| 553 |
except Exception as e:
|
| 554 |
-
logger.
|
| 555 |
|
| 556 |
|
| 557 |
# Auto-restore on HF Spaces startup (background, non-blocking).
|
|
@@ -785,7 +797,7 @@ def push_to_hub(repo_id: str | None = None) -> bool:
|
|
| 785 |
if not _ensure_hub_repo(repo):
|
| 786 |
return False
|
| 787 |
|
| 788 |
-
api = HfApi()
|
| 789 |
slug = _instance_slug()
|
| 790 |
api.upload_file(
|
| 791 |
path_or_fileobj=str(TELEMETRY_FILE),
|
|
|
|
| 15 |
file (default: ~/.obliteratus/telemetry.jsonl or /tmp/obliteratus_telemetry.jsonl
|
| 16 |
in containers).
|
| 17 |
2. On HuggingFace Spaces, records are automatically synced to a central
|
| 18 |
+
HuggingFace Dataset repo (default: pliny-the-prompter/OBLITERATUS-TELEMETRY,
|
| 19 |
configurable via OBLITERATUS_TELEMETRY_REPO). Each Space instance
|
| 20 |
uploads its own JSONL file (keyed by SPACE_ID + session), so
|
| 21 |
duplicated Spaces all feed into the same central leaderboard.
|
|
|
|
| 46 |
# ββ Configuration βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 47 |
|
| 48 |
_ON_HF_SPACES = os.environ.get("SPACE_ID") is not None
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
+
# ββ Telemetry state ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 51 |
_enabled: bool | None = None
|
| 52 |
|
| 53 |
# Central Hub repo for cross-Space telemetry aggregation.
|
| 54 |
# Default repo is used on HF Spaces so all instances (including duplicated
|
| 55 |
# Spaces) send data to the same central dataset automatically.
|
| 56 |
+
_DEFAULT_TELEMETRY_REPO = "pliny-the-prompter/OBLITERATUS-TELEMETRY"
|
| 57 |
_TELEMETRY_REPO = os.environ.get(
|
| 58 |
"OBLITERATUS_TELEMETRY_REPO",
|
| 59 |
_DEFAULT_TELEMETRY_REPO if _ON_HF_SPACES else "",
|
|
|
|
| 205 |
|
| 206 |
def disable_telemetry():
|
| 207 |
"""Disable telemetry collection."""
|
| 208 |
+
global _enabled
|
|
|
|
| 209 |
_enabled = False
|
| 210 |
|
| 211 |
|
| 212 |
def enable_telemetry():
|
| 213 |
"""Enable telemetry collection."""
|
| 214 |
+
global _enabled
|
|
|
|
| 215 |
_enabled = True
|
| 216 |
|
| 217 |
|
|
|
|
| 325 |
return True
|
| 326 |
try:
|
| 327 |
from huggingface_hub import HfApi
|
| 328 |
+
api = HfApi(token=os.environ.get("HF_TOKEN"))
|
| 329 |
+
# First try create_repo (works if we own the namespace)
|
| 330 |
+
try:
|
| 331 |
+
api.create_repo(
|
| 332 |
+
repo_id=repo_id,
|
| 333 |
+
repo_type="dataset",
|
| 334 |
+
private=False,
|
| 335 |
+
exist_ok=True,
|
| 336 |
+
)
|
| 337 |
+
_hub_repo_created = True
|
| 338 |
+
return True
|
| 339 |
+
except Exception:
|
| 340 |
+
pass
|
| 341 |
+
# Fallback: check if the repo already exists (works for
|
| 342 |
+
# collaborators / org members who can write but not create)
|
| 343 |
+
try:
|
| 344 |
+
api.repo_info(repo_id=repo_id, repo_type="dataset")
|
| 345 |
+
_hub_repo_created = True
|
| 346 |
+
logger.info(f"Hub repo {repo_id} exists (verified via repo_info)")
|
| 347 |
+
return True
|
| 348 |
+
except Exception as e:
|
| 349 |
+
logger.warning(
|
| 350 |
+
f"Hub repo {repo_id}: create_repo failed and repo_info "
|
| 351 |
+
f"also failed β repo may not exist or token lacks access: {e}"
|
| 352 |
+
)
|
| 353 |
+
return False
|
| 354 |
+
except ImportError:
|
| 355 |
+
logger.warning("huggingface_hub not installed β cannot ensure Hub repo")
|
| 356 |
return False
|
| 357 |
|
| 358 |
|
|
|
|
| 380 |
from huggingface_hub import HfApi
|
| 381 |
if not _ensure_hub_repo(repo):
|
| 382 |
return
|
| 383 |
+
api = HfApi(token=os.environ.get("HF_TOKEN"))
|
| 384 |
slug = _instance_slug()
|
| 385 |
api.upload_file(
|
| 386 |
path_or_fileobj=str(TELEMETRY_FILE),
|
|
|
|
| 389 |
repo_type="dataset",
|
| 390 |
commit_message=f"Auto-sync telemetry from {slug}",
|
| 391 |
)
|
| 392 |
+
logger.info(f"Synced telemetry to {repo}/data/{slug}.jsonl")
|
| 393 |
except Exception as e:
|
| 394 |
+
logger.warning(f"Hub sync failed: {e}")
|
| 395 |
finally:
|
| 396 |
_sync_in_progress.clear()
|
| 397 |
|
|
|
|
| 437 |
try:
|
| 438 |
from huggingface_hub import HfApi, hf_hub_download
|
| 439 |
|
| 440 |
+
api = HfApi(token=os.environ.get("HF_TOKEN"))
|
| 441 |
try:
|
| 442 |
all_files = api.list_repo_files(repo, repo_type="dataset")
|
| 443 |
except Exception:
|
|
|
|
| 554 |
)
|
| 555 |
return new_count
|
| 556 |
except Exception as e:
|
| 557 |
+
logger.warning("Hub restore failed: %s", e)
|
| 558 |
return 0
|
| 559 |
|
| 560 |
|
|
|
|
| 563 |
try:
|
| 564 |
restore_from_hub()
|
| 565 |
except Exception as e:
|
| 566 |
+
logger.warning("Background Hub restore failed: %s", e)
|
| 567 |
|
| 568 |
|
| 569 |
# Auto-restore on HF Spaces startup (background, non-blocking).
|
|
|
|
| 797 |
if not _ensure_hub_repo(repo):
|
| 798 |
return False
|
| 799 |
|
| 800 |
+
api = HfApi(token=os.environ.get("HF_TOKEN"))
|
| 801 |
slug = _instance_slug()
|
| 802 |
api.upload_file(
|
| 803 |
path_or_fileobj=str(TELEMETRY_FILE),
|