obliteratus

Running on Zero

App Files Files Community

pliny-the-prompter commited on Mar 2

Commit

a9b2ff1

verified ·

1 Parent(s): f6b07b4

Upload 128 files

Browse files

Files changed (6) hide show

app.py +92 -60
obliteratus/.DS_Store +0 -0
obliteratus/abliterate.py +5 -2
obliteratus/presets.py +178 -1
obliteratus/telemetry.py +8 -7
tests/test_telemetry.py +11 -6

app.py CHANGED Viewed

@@ -68,48 +68,72 @@ _lock = threading.Lock()
 _bench_configs: dict[str, dict] = {}
 # ---------------------------------------------------------------------------
-# Model presets (subset that fits on a T4 16GB)
 # ---------------------------------------------------------------------------
-MODELS = {
-    # ── Tiny (< 2B) ──────────────────────────────────────────────────────
-    # All models below are non-gated (no HF approval required)
-    "Qwen2.5 0.5B Instruct": "Qwen/Qwen2.5-0.5B-Instruct",
-    "Qwen3 0.6B": "Qwen/Qwen3-0.6B",
-    "OLMo 2 1B Instruct": "allenai/OLMo-2-0425-1B-Instruct",
-    "TinyLlama 1.1B Chat": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-    "DeepSeek R1 Distill Qwen 1.5B": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
-    "Qwen2.5 1.5B Instruct": "Qwen/Qwen2.5-1.5B-Instruct",
-    "Qwen3 1.7B": "Qwen/Qwen3-1.7B",
-    "SmolLM2 1.7B Instruct": "HuggingFaceTB/SmolLM2-1.7B-Instruct",
-    # ── Small (2-5B) ─────────────────────────────────────────────────────
-    "Phi-2 (2.7B)": "microsoft/phi-2",
-    "Qwen2.5 3B Instruct": "Qwen/Qwen2.5-3B-Instruct",
-    "SmolLM3 3B": "HuggingFaceTB/SmolLM3-3B",
-    "Falcon3 3B Instruct": "tiiuae/Falcon3-3B-Instruct",
-    "Phi-4 Mini Instruct (3.8B)": "microsoft/Phi-4-mini-instruct",
-    "MiniCPM3 4B": "openbmb/MiniCPM3-4B",
-    "Qwen3 4B": "Qwen/Qwen3-4B",
-    # ── Medium (5-9B) ────────────────────────────────────────────────────
-    "Qwen2.5 7B Instruct": "Qwen/Qwen2.5-7B-Instruct",
-    "Qwen2.5 Coder 7B Instruct": "Qwen/Qwen2.5-Coder-7B-Instruct",
-    "OLMo 3 7B Instruct": "allenai/Olmo-3-7B-Instruct",
-    "Falcon3 7B Instruct": "tiiuae/Falcon3-7B-Instruct",
-    "Qwen3 8B": "Qwen/Qwen3-8B",
-    "DeepSeek R1 0528 Qwen3 8B": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
-    "InternLM3 8B Instruct": "internlm/internlm3-8b-instruct",
-    "GLM-4 9B Chat": "THUDM/glm-4-9b-chat-hf",
-    # ── Frontier (MoE — tight fit on T4 with quantization) ─────────────
-    "GPT-OSS 20B (MoE, 3.6B active)": "openai/gpt-oss-20b",
-    "Qwen3 30B-A3B (MoE, 3B active)": "Qwen/Qwen3-30B-A3B",
-    "GLM-4.7 Flash (MoE, 3B active)": "zai-org/GLM-4.7-Flash",
-    # ── Frontier (multi-GPU / cloud only) ──────────────────────────────
-    "Qwen3.5 397B-A17B (MoE)": "Qwen/Qwen3.5-397B-A17B",
-    "GLM-5 744B (MoE, 40B active)": "zai-org/GLM-5",
-    "MiniMax M2.5 (MoE, 10B active)": "MiniMaxAI/MiniMax-M2.5",
-    "DeepSeek-V3 685B (MoE)": "deepseek-ai/DeepSeek-V3",
 }
 METHODS = {
     "advanced (recommended)": "advanced",
     "basic (fast, single direction)": "basic",
@@ -2678,7 +2702,7 @@ with gr.Blocks(theme=THEME, css=CSS, js=_JS, title="OBLITERATUS", fill_height=Tr
             with gr.Row():
                 model_dd = gr.Dropdown(
                     choices=list(MODELS.keys()),
-                    value="Qwen3 4B",
                     label="Target Model",
                     info="Models sized for a free T4 GPU (16 GB VRAM)",
                     allow_custom_value=True,
@@ -2822,6 +2846,12 @@ with gr.Blocks(theme=THEME, css=CSS, js=_JS, title="OBLITERATUS", fill_height=Tr
                 cleanup_btn = gr.Button("Purge Cache", variant="secondary", size="sm")
                 cleanup_status = gr.Markdown("")
         # ── Tab 2: Chat ───────────────────────────────────────────────────
         with gr.Tab("Chat", id="chat"):
             chat_status = gr.Markdown(get_chat_header)
@@ -2920,7 +2950,7 @@ tradeoff point where refusal is minimized with minimal capability damage.
             with gr.Row():
                 sweep_model_dd = gr.Dropdown(
                     choices=list(MODELS.keys()),
-                    value="Qwen2.5 0.5B Instruct",
                     label="Model",
                     allow_custom_value=True,
                 )
@@ -3007,7 +3037,7 @@ Great for finding the optimal strategy for a specific architecture.
 from gradio_client import Client
 client = Client("pliny-the-prompter/obliteratus")
 result = client.predict(
-    model_choice="Qwen2.5 0.5B Instruct",
     methods_to_test=["basic", "advanced", "surgical", "optimized"],
     prompt_volume_choice="33 (fast)",
     api_name="/benchmark",
@@ -3017,7 +3047,7 @@ result = client.predict(
                     with gr.Row():
                         bench_model = gr.Dropdown(
                             choices=list(MODELS.keys()),
-                            value="Qwen2.5 0.5B Instruct",
                             label="Target Model",
                             allow_custom_value=True,
                         )
@@ -3122,7 +3152,7 @@ how well a technique generalizes — especially for MoE-aware methods like
 from gradio_client import Client
 client = Client("pliny-the-prompter/obliteratus")
 result = client.predict(
-    model_choices=["Qwen2.5 0.5B Instruct", "GPT-OSS 20B (MoE, 3.6B active)"],
     method_choice="surgical",
     prompt_volume_choice="33 (fast)",
     api_name="/benchmark_multi_model",
@@ -3133,8 +3163,8 @@ result = client.predict(
                         mm_models = gr.CheckboxGroup(
                             choices=list(MODELS.keys()),
                             value=[
-                                "Qwen2.5 0.5B Instruct",
-                                "Qwen2.5 3B Instruct",
                             ],
                             label="Models to Test",
                         )
@@ -3279,7 +3309,7 @@ Pre-configured benchmark configurations for common research questions.
                     def _preset_gptoss(vol, ds):
                         yield from benchmark(
-                            "GPT-OSS 20B (MoE, 3.6B active)",
                             ["basic", "advanced", "aggressive", "surgical",
                              "optimized", "inverted", "nuclear"],
                             vol, ds,
@@ -3288,10 +3318,10 @@ Pre-configured benchmark configurations for common research questions.
                     def _preset_moe_cross(vol, ds):
                         yield from benchmark_multi_model(
                             [
-                                "Qwen2.5 0.5B Instruct",
-                                "Qwen2.5 3B Instruct",
-                                "Qwen2.5 7B Instruct",
-                                "GPT-OSS 20B (MoE, 3.6B active)",
                             ],
                             "surgical", vol, ds,
                         )
@@ -3303,9 +3333,9 @@ Pre-configured benchmark configurations for common research questions.
                         # Part 1: basic method across models
                         for status, results_md, log, gallery in benchmark_multi_model(
                             [
-                                "Qwen2.5 0.5B Instruct",
-                                "Qwen2.5 3B Instruct",
-                                "Qwen2.5 7B Instruct",
                             ],
                             "basic", vol, ds,
                         ):
@@ -3314,9 +3344,9 @@ Pre-configured benchmark configurations for common research questions.
                         # Part 2: optimized method across models
                         for status, results_md, log, gallery in benchmark_multi_model(
                             [
-                                "Qwen2.5 0.5B Instruct",
-                                "Qwen2.5 3B Instruct",
-                                "Qwen2.5 7B Instruct",
                             ],
                             "optimized", vol, ds,
                         ):
@@ -3341,10 +3371,12 @@ Pre-configured benchmark configurations for common research questions.
         # ── Tab 7: Leaderboard ────────────────────────────────────────────
         with gr.Tab("Leaderboard", id="leaderboard"):
             gr.Markdown("""### Community Leaderboard
-All benchmark results from this Space are anonymously logged.
-See which model + method combinations perform best across the community.
-*Telemetry is anonymous (no user identity, no prompts). Opt in: set `OBLITERATUS_TELEMETRY=1`.*
 """)
             def _load_leaderboard():
@@ -3352,7 +3384,7 @@ See which model + method combinations perform best across the community.
                 try:
                     from obliteratus.telemetry import get_leaderboard_data, is_telemetry_enabled
                     if not is_telemetry_enabled():
-                        return "Telemetry is disabled. Set `OBLITERATUS_TELEMETRY=1` to enable.", ""
                     data = get_leaderboard_data()
                     if not data:

 _bench_configs: dict[str, dict] = {}
 # ---------------------------------------------------------------------------
+# Model presets — 100+ models organized by provider
 # ---------------------------------------------------------------------------
+# Map HF org prefixes to display provider names
+_PROVIDER_NAMES = {
+    "01-ai": "01.AI",
+    "Qwen": "Alibaba (Qwen)",
+    "allenai": "Allen AI",
+    "apple": "Apple",
+    "CohereForAI": "Cohere",
+    "databricks": "Databricks",
+    "deepseek-ai": "DeepSeek",
+    "EleutherAI": "EleutherAI",
+    "google": "Google",
+    "distilbert": "HuggingFace",
+    "HuggingFaceTB": "HuggingFace",
+    "ibm-granite": "IBM",
+    "TinyLlama": "Meta (LLaMA)",
+    "meta-llama": "Meta (LLaMA)",
+    "microsoft": "Microsoft",
+    "MiniMaxAI": "MiniMax",
+    "mistralai": "Mistral",
+    "moonshotai": "Moonshot",
+    "nvidia": "NVIDIA",
+    "openai": "OpenAI",
+    "openai-community": "OpenAI",
+    "openbmb": "OpenBMB",
+    "internlm": "Shanghai AI Lab",
+    "stabilityai": "Stability AI",
+    "stepfun-ai": "StepFun",
+    "tiiuae": "TII (Falcon)",
+    "THUDM": "Zhipu AI (GLM)",
+    "zai-org": "Zhipu AI (GLM)",
+    # Community fine-tunes
+    "huihui-ai": "Community",
+    "cognitivecomputations": "Community",
+    "NousResearch": "Community",
+    "mlabonne": "Community",
+    "Orenguteng": "Community",
+    "WhiteRabbitNeo": "Community",
 }
+def _build_model_choices() -> dict[str, str]:
+    """Build display_name → hf_id mapping from presets, grouped by provider."""
+    from obliteratus.presets import list_all_presets
+    presets = list_all_presets()
+    # Group by provider
+    groups: dict[str, list[tuple[str, str]]] = {}
+    for p in presets:
+        org = p.hf_id.split("/")[0] if "/" in p.hf_id else ""
+        provider = _PROVIDER_NAMES.get(org, org)
+        groups.setdefault(provider, []).append((p.name, p.hf_id))
+    # Build ordered dict: providers alphabetically, models by name within each
+    models: dict[str, str] = {}
+    for provider in sorted(groups.keys()):
+        for name, hf_id in groups[provider]:
+            display = f"{provider} / {name}"
+            models[display] = hf_id
+    return models
+MODELS = _build_model_choices()
 METHODS = {
     "advanced (recommended)": "advanced",
     "basic (fast, single direction)": "basic",
             with gr.Row():
                 model_dd = gr.Dropdown(
                     choices=list(MODELS.keys()),
+                    value="Alibaba (Qwen) / Qwen3-4B",
                     label="Target Model",
                     info="Models sized for a free T4 GPU (16 GB VRAM)",
                     allow_custom_value=True,
                 cleanup_btn = gr.Button("Purge Cache", variant="secondary", size="sm")
                 cleanup_status = gr.Markdown("")
+            gr.Markdown(
+                "*Anonymous telemetry is on by default (no user identity or prompts collected). "
+                "Opt out: set `OBLITERATUS_TELEMETRY=0`.*",
+                elem_classes=["telemetry-notice"],
+            )
         # ── Tab 2: Chat ───────────────────────────────────────────────────
         with gr.Tab("Chat", id="chat"):
             chat_status = gr.Markdown(get_chat_header)
             with gr.Row():
                 sweep_model_dd = gr.Dropdown(
                     choices=list(MODELS.keys()),
+                    value="Alibaba (Qwen) / Qwen2.5-0.5B Instruct",
                     label="Model",
                     allow_custom_value=True,
                 )
 from gradio_client import Client
 client = Client("pliny-the-prompter/obliteratus")
 result = client.predict(
+    model_choice="Alibaba (Qwen) / Qwen2.5-0.5B Instruct",
     methods_to_test=["basic", "advanced", "surgical", "optimized"],
     prompt_volume_choice="33 (fast)",
     api_name="/benchmark",
                     with gr.Row():
                         bench_model = gr.Dropdown(
                             choices=list(MODELS.keys()),
+                            value="Alibaba (Qwen) / Qwen2.5-0.5B Instruct",
                             label="Target Model",
                             allow_custom_value=True,
                         )
 from gradio_client import Client
 client = Client("pliny-the-prompter/obliteratus")
 result = client.predict(
+    model_choices=["Alibaba (Qwen) / Qwen2.5-0.5B Instruct", "OpenAI / GPT-OSS 20B"],
     method_choice="surgical",
     prompt_volume_choice="33 (fast)",
     api_name="/benchmark_multi_model",
                         mm_models = gr.CheckboxGroup(
                             choices=list(MODELS.keys()),
                             value=[
+                                "Alibaba (Qwen) / Qwen2.5-0.5B Instruct",
+                                "Alibaba (Qwen) / Qwen2.5-3B Instruct",
                             ],
                             label="Models to Test",
                         )
                     def _preset_gptoss(vol, ds):
                         yield from benchmark(
+                            "OpenAI / GPT-OSS 20B",
                             ["basic", "advanced", "aggressive", "surgical",
                              "optimized", "inverted", "nuclear"],
                             vol, ds,
                     def _preset_moe_cross(vol, ds):
                         yield from benchmark_multi_model(
                             [
+                                "Alibaba (Qwen) / Qwen2.5-0.5B Instruct",
+                                "Alibaba (Qwen) / Qwen2.5-3B Instruct",
+                                "Alibaba (Qwen) / Qwen2.5-7B Instruct",
+                                "OpenAI / GPT-OSS 20B",
                             ],
                             "surgical", vol, ds,
                         )
                         # Part 1: basic method across models
                         for status, results_md, log, gallery in benchmark_multi_model(
                             [
+                                "Alibaba (Qwen) / Qwen2.5-0.5B Instruct",
+                                "Alibaba (Qwen) / Qwen2.5-3B Instruct",
+                                "Alibaba (Qwen) / Qwen2.5-7B Instruct",
                             ],
                             "basic", vol, ds,
                         ):
                         # Part 2: optimized method across models
                         for status, results_md, log, gallery in benchmark_multi_model(
                             [
+                                "Alibaba (Qwen) / Qwen2.5-0.5B Instruct",
+                                "Alibaba (Qwen) / Qwen2.5-3B Instruct",
+                                "Alibaba (Qwen) / Qwen2.5-7B Instruct",
                             ],
                             "optimized", vol, ds,
                         ):
         # ── Tab 7: Leaderboard ────────────────────────────────────────────
         with gr.Tab("Leaderboard", id="leaderboard"):
             gr.Markdown("""### Community Leaderboard
+All benchmark results from this Space are anonymously logged to help the community
+find the best model + method combinations.
+*Telemetry is **on by default** and is fully anonymous — no user identity, IP addresses, or prompt content
+is ever collected. Only aggregate benchmark metrics (model name, method, scores, hardware) are stored locally.
+To opt out, set the environment variable `OBLITERATUS_TELEMETRY=0` before launching.*
 """)
             def _load_leaderboard():
                 try:
                     from obliteratus.telemetry import get_leaderboard_data, is_telemetry_enabled
                     if not is_telemetry_enabled():
+                        return "Telemetry is disabled. Remove `OBLITERATUS_TELEMETRY=0` or set it to `1` to re-enable.", ""
                     data = get_leaderboard_data()
                     if not data:

obliteratus/.DS_Store CHANGED Viewed

Binary files a/obliteratus/.DS_Store and b/obliteratus/.DS_Store differ

obliteratus/abliterate.py CHANGED Viewed

@@ -941,9 +941,10 @@ class AbliterationPipeline:
             self.log("  Chat template not configured for this model; using raw prompts")
             return prompts
-        self.log("  Wrapping prompts with chat template")
         wrapped = []
-        for prompt in prompts:
             messages = [{"role": "user", "content": prompt}]
             try:
                 text = tokenizer.apply_chat_template(
@@ -952,6 +953,8 @@ class AbliterationPipeline:
                 wrapped.append(text)
             except Exception:
                 wrapped.append(prompt)  # fallback to raw if individual prompt fails
         return wrapped
     @staticmethod

             self.log("  Chat template not configured for this model; using raw prompts")
             return prompts
+        n = len(prompts)
+        self.log(f"  Wrapping {n} prompts with chat template")
         wrapped = []
+        for i, prompt in enumerate(prompts):
             messages = [{"role": "user", "content": prompt}]
             try:
                 text = tokenizer.apply_chat_template(
                 wrapped.append(text)
             except Exception:
                 wrapped.append(prompt)  # fallback to raw if individual prompt fails
+            if (i + 1) % 100 == 0 or (i + 1) == n:
+                self.log(f"    chat template {i + 1}/{n}")
         return wrapped
     @staticmethod

obliteratus/presets.py CHANGED Viewed

@@ -70,6 +70,22 @@ _PRESETS_LIST = [
         params="0.5B",
         recommended_dtype="float16",
     ),
     ModelPreset(
         name="Qwen2.5-1.5B",
         hf_id="Qwen/Qwen2.5-1.5B",
@@ -78,6 +94,38 @@ _PRESETS_LIST = [
         params="1.5B",
         recommended_dtype="float16",
     ),
     ModelPreset(
         name="Qwen2.5-7B",
         hf_id="Qwen/Qwen2.5-7B",
@@ -87,6 +135,33 @@ _PRESETS_LIST = [
         recommended_dtype="float16",
         recommended_quantization="4bit",
     ),
     ModelPreset(
         name="Qwen2.5-14B",
         hf_id="Qwen/Qwen2.5-14B",
@@ -146,7 +221,24 @@ _PRESETS_LIST = [
     # ║  Allen Institute for AI (AI2)                                   ║
     # ╚══════════════════════════════════════════════════════════════════╝
     ModelPreset(
-        name="OLMo 2 7B",
         hf_id="allenai/OLMo-2-0325-32B-Instruct",
         description="AI2's fully open 32B model (data+code+weights). Apache 2.0.",
         tier="large",
@@ -301,6 +393,15 @@ _PRESETS_LIST = [
         recommended_dtype="bfloat16",
         recommended_quantization="4bit",
     ),
     ModelPreset(
         name="DeepSeek-V3",
         hf_id="deepseek-ai/DeepSeek-V3",
@@ -448,6 +549,14 @@ _PRESETS_LIST = [
         params="1.7B",
         recommended_dtype="float16",
     ),
     # ╔══════════════════════════════════════════════════════════════════╗
     # ║  IBM (Granite)                                                  ║
@@ -537,6 +646,14 @@ _PRESETS_LIST = [
         params="3.8B",
         recommended_dtype="float16",
     ),
     ModelPreset(
         name="Phi-4",
         hf_id="microsoft/phi-4",
@@ -618,6 +735,18 @@ _PRESETS_LIST = [
         recommended_quantization="4bit",
     ),
     # ╔══════════════════════════════════════════════════════════════════╗
     # ║  Moonshot AI (Kimi)                                             ║
     # ╚══════════════════════════════════════════════════════════════════╝
@@ -670,6 +799,19 @@ _PRESETS_LIST = [
         recommended_quantization="4bit",
     ),
     # ╔══════════════════════════════════════════════════════════════════╗
     # ║  OpenAI Community (GPT-2)                                       ║
     # ╚══════════════════════════════════════════════════════════════════╝
@@ -709,6 +851,15 @@ _PRESETS_LIST = [
     # ╔══════════════════════════════════════════════════════════════════╗
     # ║  Shanghai AI Lab (InternLM)                                     ║
     # ╚══════════════════════════════════════════════════════════════════╝
     ModelPreset(
         name="InternLM2.5 7B Chat",
         hf_id="internlm/internlm2_5-7b-chat",
@@ -764,6 +915,14 @@ _PRESETS_LIST = [
     # ╔══════════════════════════════════════════════════════════════════╗
     # ║  Technology Innovation Institute (Falcon)                       ║
     # ╚════════════════════════════════════════════════════════════���═════╝
     ModelPreset(
         name="Falcon 7B",
         hf_id="tiiuae/falcon-7b",
@@ -773,6 +932,15 @@ _PRESETS_LIST = [
         recommended_dtype="float16",
         recommended_quantization="4bit",
     ),
     ModelPreset(
         name="Falcon 11B",
         hf_id="tiiuae/falcon-11B",
@@ -813,6 +981,15 @@ _PRESETS_LIST = [
         recommended_dtype="float16",
         recommended_quantization="4bit",
     ),
     ModelPreset(
         name="GLM-4 32B Chat",
         hf_id="zai-org/GLM-4-32B-0414",

         params="0.5B",
         recommended_dtype="float16",
     ),
+    ModelPreset(
+        name="Qwen2.5-0.5B Instruct",
+        hf_id="Qwen/Qwen2.5-0.5B-Instruct",
+        description="Tiny Qwen instruct model, fast ablation studies with chat template.",
+        tier="tiny",
+        params="0.5B",
+        recommended_dtype="float16",
+    ),
+    ModelPreset(
+        name="Qwen3-0.6B",
+        hf_id="Qwen/Qwen3-0.6B",
+        description="Qwen3 0.6B — smallest Qwen3 with think/non-think modes.",
+        tier="tiny",
+        params="0.6B",
+        recommended_dtype="float16",
+    ),
     ModelPreset(
         name="Qwen2.5-1.5B",
         hf_id="Qwen/Qwen2.5-1.5B",
         params="1.5B",
         recommended_dtype="float16",
     ),
+    ModelPreset(
+        name="Qwen2.5-1.5B Instruct",
+        hf_id="Qwen/Qwen2.5-1.5B-Instruct",
+        description="Qwen 1.5B instruct — strong multilingual chat model.",
+        tier="small",
+        params="1.5B",
+        recommended_dtype="float16",
+    ),
+    ModelPreset(
+        name="Qwen3-1.7B",
+        hf_id="Qwen/Qwen3-1.7B",
+        description="Qwen3 1.7B — compact Qwen3 with think/non-think modes.",
+        tier="small",
+        params="1.7B",
+        recommended_dtype="float16",
+    ),
+    ModelPreset(
+        name="Qwen2.5-3B Instruct",
+        hf_id="Qwen/Qwen2.5-3B-Instruct",
+        description="Qwen 3B instruct — excellent small chat model.",
+        tier="small",
+        params="3B",
+        recommended_dtype="float16",
+    ),
+    ModelPreset(
+        name="Qwen3-4B",
+        hf_id="Qwen/Qwen3-4B",
+        description="Qwen3 4B — strong reasoning with think/non-think modes. Apache 2.0.",
+        tier="small",
+        params="4B",
+        recommended_dtype="float16",
+    ),
     ModelPreset(
         name="Qwen2.5-7B",
         hf_id="Qwen/Qwen2.5-7B",
         recommended_dtype="float16",
         recommended_quantization="4bit",
     ),
+    ModelPreset(
+        name="Qwen2.5-7B Instruct",
+        hf_id="Qwen/Qwen2.5-7B-Instruct",
+        description="Qwen 7B instruct variant with chat template.",
+        tier="medium",
+        params="7B",
+        recommended_dtype="float16",
+        recommended_quantization="4bit",
+    ),
+    ModelPreset(
+        name="Qwen2.5 Coder 7B Instruct",
+        hf_id="Qwen/Qwen2.5-Coder-7B-Instruct",
+        description="Qwen 7B fine-tuned for code generation and understanding.",
+        tier="medium",
+        params="7B",
+        recommended_dtype="float16",
+        recommended_quantization="4bit",
+    ),
+    ModelPreset(
+        name="Qwen3-8B",
+        hf_id="Qwen/Qwen3-8B",
+        description="Qwen3 8B — strong reasoning, think/non-think modes. Apache 2.0.",
+        tier="medium",
+        params="8B",
+        recommended_dtype="float16",
+        recommended_quantization="4bit",
+    ),
     ModelPreset(
         name="Qwen2.5-14B",
         hf_id="Qwen/Qwen2.5-14B",
     # ║  Allen Institute for AI (AI2)                                   ║
     # ╚══════════════════════════════════════════════════════════════════╝
     ModelPreset(
+        name="OLMo 2 1B Instruct",
+        hf_id="allenai/OLMo-2-0425-1B-Instruct",
+        description="AI2's compact fully open 1B instruct model. Apache 2.0.",
+        tier="tiny",
+        params="1B",
+        recommended_dtype="float16",
+    ),
+    ModelPreset(
+        name="OLMo 3 7B Instruct",
+        hf_id="allenai/Olmo-3-7B-Instruct",
+        description="AI2's fully open 7B instruct model. Apache 2.0.",
+        tier="medium",
+        params="7B",
+        recommended_dtype="float16",
+        recommended_quantization="4bit",
+    ),
+    ModelPreset(
+        name="OLMo 2 32B Instruct",
         hf_id="allenai/OLMo-2-0325-32B-Instruct",
         description="AI2's fully open 32B model (data+code+weights). Apache 2.0.",
         tier="large",
         recommended_dtype="bfloat16",
         recommended_quantization="4bit",
     ),
+    ModelPreset(
+        name="DeepSeek-R1 0528 Qwen3-8B",
+        hf_id="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
+        description="DeepSeek-R1 reasoning distilled into Qwen3 8B. Latest R1 distillation. MIT.",
+        tier="medium",
+        params="8B",
+        recommended_dtype="float16",
+        recommended_quantization="4bit",
+    ),
     ModelPreset(
         name="DeepSeek-V3",
         hf_id="deepseek-ai/DeepSeek-V3",
         params="1.7B",
         recommended_dtype="float16",
     ),
+    ModelPreset(
+        name="SmolLM3-3B",
+        hf_id="HuggingFaceTB/SmolLM3-3B",
+        description="HuggingFace's SmolLM3 3B. Latest efficient small LM.",
+        tier="small",
+        params="3B",
+        recommended_dtype="float16",
+    ),
     # ╔══════════════════════════════════════════════════════════════════╗
     # ║  IBM (Granite)                                                  ║
         params="3.8B",
         recommended_dtype="float16",
     ),
+    ModelPreset(
+        name="Phi-4 Mini Instruct",
+        hf_id="microsoft/Phi-4-mini-instruct",
+        description="Microsoft's 3.8B Phi-4 Mini. Strong reasoning for its size. MIT license.",
+        tier="small",
+        params="3.8B",
+        recommended_dtype="float16",
+    ),
     ModelPreset(
         name="Phi-4",
         hf_id="microsoft/phi-4",
         recommended_quantization="4bit",
     ),
+    # ╔══════════════════════════════════════════════════════════════════╗
+    # ║  OpenBMB                                                        ║
+    # ╚══════════════════════════════════════════════════════════════════╝
+    ModelPreset(
+        name="MiniCPM3-4B",
+        hf_id="openbmb/MiniCPM3-4B",
+        description="OpenBMB's MiniCPM3 4B. Efficient on-device LM with strong reasoning.",
+        tier="small",
+        params="4B",
+        recommended_dtype="float16",
+    ),
     # ╔══════════════════════════════════════════════════════════════════╗
     # ║  Moonshot AI (Kimi)                                             ║
     # ╚══════════════════════════════════════════════════════════════════╝
         recommended_quantization="4bit",
     ),
+    # ╔══════════════════════════════════════════════════════════════════╗
+    # ║  OpenAI                                                         ║
+    # ╚══════════════════════════════════════════════════════════════════╝
+    ModelPreset(
+        name="GPT-OSS 20B",
+        hf_id="openai/gpt-oss-20b",
+        description="OpenAI's first open-weight MoE (20B total, 3.6B active). MIT license.",
+        tier="large",
+        params="20B MoE",
+        recommended_dtype="float16",
+        recommended_quantization="4bit",
+    ),
     # ╔══════════════════════════════════════════════════════════════════╗
     # ║  OpenAI Community (GPT-2)                                       ║
     # ╚══════════════════════════════════════════════════════════════════╝
     # ╔══════════════════════════════════════════════════════════════════╗
     # ║  Shanghai AI Lab (InternLM)                                     ║
     # ╚══════════════════════════════════════════════════════════════════╝
+    ModelPreset(
+        name="InternLM3-8B Instruct",
+        hf_id="internlm/internlm3-8b-instruct",
+        description="Shanghai AI Lab's InternLM3 8B instruct. Strong reasoning. Apache 2.0.",
+        tier="medium",
+        params="8B",
+        recommended_dtype="float16",
+        recommended_quantization="4bit",
+    ),
     ModelPreset(
         name="InternLM2.5 7B Chat",
         hf_id="internlm/internlm2_5-7b-chat",
     # ╔══════════════════════════════════════════════════════════════════╗
     # ║  Technology Innovation Institute (Falcon)                       ║
     # ╚════════════════════════════════════════════════════════════���═════╝
+    ModelPreset(
+        name="Falcon3-3B Instruct",
+        hf_id="tiiuae/Falcon3-3B-Instruct",
+        description="TII's Falcon3 3B instruct. Modern architecture, Apache 2.0.",
+        tier="small",
+        params="3B",
+        recommended_dtype="float16",
+    ),
     ModelPreset(
         name="Falcon 7B",
         hf_id="tiiuae/falcon-7b",
         recommended_dtype="float16",
         recommended_quantization="4bit",
     ),
+    ModelPreset(
+        name="Falcon3-7B Instruct",
+        hf_id="tiiuae/Falcon3-7B-Instruct",
+        description="TII's Falcon3 7B instruct. Modern architecture, Apache 2.0.",
+        tier="medium",
+        params="7B",
+        recommended_dtype="float16",
+        recommended_quantization="4bit",
+    ),
     ModelPreset(
         name="Falcon 11B",
         hf_id="tiiuae/falcon-11B",
         recommended_dtype="float16",
         recommended_quantization="4bit",
     ),
+    ModelPreset(
+        name="GLM-4 9B Chat HF",
+        hf_id="THUDM/glm-4-9b-chat-hf",
+        description="GLM-4 9B chat variant (HuggingFace-compatible format). No trust_remote_code needed.",
+        tier="medium",
+        params="9B",
+        recommended_dtype="float16",
+        recommended_quantization="4bit",
+    ),
     ModelPreset(
         name="GLM-4 32B Chat",
         hf_id="zai-org/GLM-4-32B-0414",

obliteratus/telemetry.py CHANGED Viewed

@@ -5,8 +5,9 @@ HuggingFace Dataset for community leaderboard aggregation.  No user
 identity, IP addresses, or prompt content is stored — only aggregate
 benchmark metrics (model name, method, scores, hardware info, timestamp).
-Users can opt in by setting OBLITERATUS_TELEMETRY=1 or calling
-enable_telemetry().  Telemetry is disabled by default.
 Architecture:
     1. Every benchmark/obliteration run appends a record to a local JSONL
@@ -38,9 +39,9 @@ logger = logging.getLogger(__name__)
 # ── Configuration ─────────────────────────────────────────────────────
-_TELEMETRY_ENABLED = os.environ.get("OBLITERATUS_TELEMETRY", "0") == "1"
-# ── Opt-in telemetry state (v2 API) ──────────────────────────────────
 _enabled: bool | None = None
 _TELEMETRY_REPO = os.environ.get(
     "OBLITERATUS_TELEMETRY_REPO", "pliny-the-prompter/obliteratus-telemetry"
@@ -95,12 +96,12 @@ def is_telemetry_enabled() -> bool:
 def is_enabled() -> bool:
-    """Check if v2 opt-in telemetry is enabled."""
     global _enabled
     if _enabled is not None:
         return _enabled
-    env = os.environ.get("OBLITERATUS_TELEMETRY", "")
-    return env in ("1", "true")
 # ── Record schema ─────────────────────────────────────────────────────

 identity, IP addresses, or prompt content is stored — only aggregate
 benchmark metrics (model name, method, scores, hardware info, timestamp).
+Telemetry is enabled by default to help the community build better
+benchmarks.  Users can opt out at any time by setting OBLITERATUS_TELEMETRY=0
+or calling disable_telemetry().
 Architecture:
     1. Every benchmark/obliteration run appends a record to a local JSONL
 # ── Configuration ─────────────────────────────────────────────────────
+_TELEMETRY_ENABLED = os.environ.get("OBLITERATUS_TELEMETRY", "1") != "0"
+# ── Telemetry state (v2 API) ─────────────────────────────────────────
 _enabled: bool | None = None
 _TELEMETRY_REPO = os.environ.get(
     "OBLITERATUS_TELEMETRY_REPO", "pliny-the-prompter/obliteratus-telemetry"
 def is_enabled() -> bool:
+    """Check if telemetry is enabled (on by default, opt out with OBLITERATUS_TELEMETRY=0)."""
     global _enabled
     if _enabled is not None:
         return _enabled
+    env = os.environ.get("OBLITERATUS_TELEMETRY", "1")
+    return env not in ("0", "false")
 # ── Record schema ─────────────────────────────────────────────────────

tests/test_telemetry.py CHANGED Viewed

@@ -37,18 +37,23 @@ class TestTelemetryConfig:
     def setup_method(self):
         _reset_telemetry()
-    def test_disabled_by_default(self):
         with patch.dict(os.environ, {}, clear=True):
             _reset_telemetry()
             assert not is_enabled()
-    def test_enable_via_env(self):
-        with patch.dict(os.environ, {"OBLITERATUS_TELEMETRY": "1"}):
             _reset_telemetry()
-            assert is_enabled()
-    def test_enable_via_env_true(self):
-        with patch.dict(os.environ, {"OBLITERATUS_TELEMETRY": "true"}):
             _reset_telemetry()
             assert is_enabled()

     def setup_method(self):
         _reset_telemetry()
+    def test_enabled_by_default(self):
         with patch.dict(os.environ, {}, clear=True):
+            _reset_telemetry()
+            assert is_enabled()
+    def test_disable_via_env_zero(self):
+        with patch.dict(os.environ, {"OBLITERATUS_TELEMETRY": "0"}):
             _reset_telemetry()
             assert not is_enabled()
+    def test_disable_via_env_false(self):
+        with patch.dict(os.environ, {"OBLITERATUS_TELEMETRY": "false"}):
             _reset_telemetry()
+            assert not is_enabled()
+    def test_enable_via_env_explicit(self):
+        with patch.dict(os.environ, {"OBLITERATUS_TELEMETRY": "1"}):
             _reset_telemetry()
             assert is_enabled()