obliteratus / tests /test_architecture_profiles.py
pliny-the-prompter's picture
Upload 127 files
45113e6 verified
"""Tests for architecture-aware preset defaults.
Tests the detection logic and recommended parameter overrides for each
architecture class (dense/MoE, standard/reasoning).
"""
from __future__ import annotations
from obliteratus.architecture_profiles import (
ArchitectureClass,
ArchitectureProfile,
ReasoningClass,
detect_architecture,
get_profile_summary,
apply_profile_to_method_config,
)
# ---------------------------------------------------------------------------
# Detection: Dense models
# ---------------------------------------------------------------------------
class TestDenseDetection:
"""Test that standard dense models are correctly classified."""
def test_llama_is_dense(self):
profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
assert profile.arch_class == ArchitectureClass.DENSE
assert profile.reasoning_class == ReasoningClass.STANDARD
assert not profile.is_moe
def test_qwen_dense_is_dense(self):
profile = detect_architecture("Qwen/Qwen2.5-7B-Instruct")
assert profile.arch_class == ArchitectureClass.DENSE
assert not profile.is_moe
def test_gemma_is_dense(self):
profile = detect_architecture("google/gemma-3-27b-it")
assert profile.arch_class == ArchitectureClass.DENSE
def test_phi_is_dense(self):
profile = detect_architecture("microsoft/Phi-4-mini-instruct")
assert profile.arch_class == ArchitectureClass.DENSE
def test_mistral_small_is_dense(self):
profile = detect_architecture("mistralai/Mistral-Small-24B-Instruct-2501")
assert profile.arch_class == ArchitectureClass.DENSE
def test_yi_is_dense(self):
profile = detect_architecture("01-ai/Yi-1.5-9B-Chat")
assert profile.arch_class == ArchitectureClass.DENSE
def test_dense_label(self):
profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
assert profile.profile_label == "Dense Standard"
def test_dense_recommended_method(self):
profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
assert profile.recommended_method == "aggressive"
# ---------------------------------------------------------------------------
# Detection: MoE models
# ---------------------------------------------------------------------------
class TestMoEDetection:
"""Test that MoE models are correctly classified."""
def test_gpt_oss_is_moe(self):
"""GPT-OSS is MoE. Without config, defaults to small (conservative)."""
profile = detect_architecture("openai/gpt-oss-20b")
assert profile.is_moe
assert profile.arch_class == ArchitectureClass.SMALL_MOE
def test_qwen3_30b_is_small_moe(self):
profile = detect_architecture("Qwen/Qwen3-30B-A3B")
assert profile.is_moe
def test_deepseek_v3_is_large_moe(self):
profile = detect_architecture("deepseek-ai/DeepSeek-V3.2")
assert profile.is_moe
def test_kimi_k2_is_large_moe(self):
profile = detect_architecture("moonshotai/Kimi-K2-Instruct")
assert profile.is_moe
def test_qwen3_235b_is_moe(self):
profile = detect_architecture("Qwen/Qwen3-235B-A22B")
assert profile.is_moe
def test_glm_47_is_moe(self):
profile = detect_architecture("zai-org/GLM-4.7")
assert profile.is_moe
def test_llama4_maverick_is_moe(self):
profile = detect_architecture("meta-llama/Llama-4-Maverick-17B-128E-Instruct")
assert profile.is_moe
def test_step_flash_is_moe(self):
profile = detect_architecture("stepfun-ai/Step-3.5-Flash")
assert profile.is_moe
def test_minimax_is_moe(self):
profile = detect_architecture("MiniMaxAI/MiniMax-M2.1")
assert profile.is_moe
def test_mistral_large_3_is_moe(self):
profile = detect_architecture("mistralai/Mistral-Large-3-675B-Instruct-2512")
assert profile.is_moe
def test_moe_recommended_method_is_surgical(self):
"""All MoE profiles recommend surgical method."""
profile = detect_architecture("openai/gpt-oss-20b")
assert profile.recommended_method == "surgical"
def test_gpt_oss_with_config_is_small_moe(self):
"""GPT-OSS with config providing expert count → small MoE."""
class MockConfig:
model_type = "gpt_neox"
num_hidden_layers = 32
hidden_size = 2560
intermediate_size = 6912
vocab_size = 50304
num_local_experts = 8
num_experts_per_tok = 2
profile = detect_architecture("openai/gpt-oss-20b", config=MockConfig())
assert profile.is_moe
assert profile.arch_class == ArchitectureClass.SMALL_MOE
# ---------------------------------------------------------------------------
# Detection: Reasoning models
# ---------------------------------------------------------------------------
class TestReasoningDetection:
"""Test that reasoning models are correctly classified."""
def test_r1_distill_qwen_is_reasoning(self):
profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B")
assert profile.reasoning_class == ReasoningClass.REASONING
def test_r1_distill_llama_is_reasoning(self):
profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Llama-8B")
assert profile.reasoning_class == ReasoningClass.REASONING
def test_r1_distill_is_dense_reasoning(self):
"""R1 distills are dense (distilled from MoE into dense)."""
profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-14B")
assert profile.arch_class == ArchitectureClass.DENSE
assert profile.reasoning_class == ReasoningClass.REASONING
assert profile.profile_label == "Dense Reasoning"
def test_olmo_think_is_reasoning(self):
profile = detect_architecture("allenai/Olmo-3.1-32B-Think")
assert profile.reasoning_class == ReasoningClass.REASONING
def test_olmo_standard_is_not_reasoning(self):
"""OLMo (without Think) must NOT be classified as reasoning.
Regression test: 'olmo' contains 'o1' substring."""
profile = detect_architecture("allenai/Olmo-3-7B-Instruct")
assert profile.reasoning_class == ReasoningClass.STANDARD
def test_falcon3_is_not_reasoning(self):
"""falcon3 must NOT match 'o3' reasoning pattern."""
profile = detect_architecture("tiiuae/Falcon3-7B-Instruct")
assert profile.reasoning_class == ReasoningClass.STANDARD
def test_full_r1_is_moe_reasoning(self):
profile = detect_architecture("deepseek-ai/DeepSeek-R1")
assert profile.is_moe
assert profile.reasoning_class == ReasoningClass.REASONING
def test_reasoning_dense_more_directions(self):
"""Dense reasoning models need more directions (>=12) to span refusal."""
profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B")
assert profile.arch_class == ArchitectureClass.DENSE
assert profile.method_overrides.get("n_directions", 0) >= 12
def test_reasoning_dense_more_passes(self):
"""Dense reasoning models need more refinement passes (>=4)."""
profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B")
assert profile.arch_class == ArchitectureClass.DENSE
assert profile.method_overrides.get("refinement_passes", 0) >= 4
def test_non_reasoning_is_standard(self):
profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
assert profile.reasoning_class == ReasoningClass.STANDARD
# ---------------------------------------------------------------------------
# Detection with config object
# ---------------------------------------------------------------------------
class TestConfigDetection:
"""Test detection when a mock config is provided."""
def test_moe_config_attrs(self):
"""Config with num_local_experts should be detected as MoE."""
class MockConfig:
model_type = "mixtral"
num_hidden_layers = 32
hidden_size = 4096
intermediate_size = 14336
vocab_size = 32000
num_local_experts = 8
num_experts_per_tok = 2
profile = detect_architecture(
"custom/mixtral-model", config=MockConfig(),
num_layers=32, hidden_size=4096,
)
assert profile.is_moe
assert profile.num_experts == 8
assert profile.num_active_experts == 2
def test_large_moe_threshold(self):
"""MoE models with >100B params should be classified as large."""
class MockConfig:
model_type = "deepseek_v3"
num_hidden_layers = 61
hidden_size = 7168
intermediate_size = 18432
vocab_size = 102400
n_routed_experts = 256
num_experts_per_tok = 8
profile = detect_architecture(
"custom/large-moe", config=MockConfig(),
)
assert profile.arch_class == ArchitectureClass.LARGE_MOE
def test_small_moe_threshold(self):
"""MoE models with <=16 experts should be classified as small."""
class MockConfig:
model_type = "mixtral"
num_hidden_layers = 32
hidden_size = 4096
intermediate_size = 14336
vocab_size = 32000
num_local_experts = 8
num_experts_per_tok = 2
profile = detect_architecture(
"custom/small-moe", config=MockConfig(),
)
assert profile.arch_class == ArchitectureClass.SMALL_MOE
def test_dense_config(self):
"""Config without MoE attributes should be dense."""
class MockConfig:
model_type = "llama"
num_hidden_layers = 32
hidden_size = 4096
intermediate_size = 11008
vocab_size = 32000
profile = detect_architecture(
"custom/dense-model", config=MockConfig(),
)
assert profile.arch_class == ArchitectureClass.DENSE
assert not profile.is_moe
def test_llama4_scout_is_large_moe(self):
"""Llama 4 Scout: 109B total params with 16 experts → LARGE_MOE.
Regression test: params > 100B must override low expert count."""
class MockConfig:
model_type = "llama4"
num_hidden_layers = 48
hidden_size = 5120
intermediate_size = 14336
vocab_size = 202048
num_local_experts = 16
num_experts_per_tok = 1
profile = detect_architecture(
"meta-llama/Llama-4-Scout-17B-16E-Instruct",
config=MockConfig(),
)
assert profile.is_moe
assert profile.arch_class == ArchitectureClass.LARGE_MOE
# ---------------------------------------------------------------------------
# Recommended defaults validation
# ---------------------------------------------------------------------------
class TestRecommendedDefaults:
"""Test that recommended defaults match research findings."""
def test_dense_standard_no_riemannian(self):
"""Dense Standard: Riemannian OFF (manifolds are flat)."""
profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
assert not profile.breakthrough_modules.get("riemannian", True)
def test_dense_standard_anti_ouroboros_on(self):
"""Dense Standard: Anti-Ouroboros ON for self-repair mapping."""
profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
assert profile.breakthrough_modules.get("anti_ouroboros", False)
def test_dense_standard_spectral_cert_on(self):
"""Dense Standard: Spectral cert ON for verification."""
profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
assert profile.breakthrough_modules.get("spectral_cert", False)
def test_moe_conditional_on(self):
"""MoE: Conditional abliteration is #1 technique (Cracken AI 2025)."""
profile = detect_architecture("openai/gpt-oss-20b")
assert profile.breakthrough_modules.get("conditional", False)
def test_moe_no_project_embeddings(self):
"""MoE: Project embeddings OFF (cascades through router)."""
profile = detect_architecture("openai/gpt-oss-20b")
assert not profile.method_overrides.get("project_embeddings", True)
def test_moe_per_expert_directions(self):
"""MoE: Per-expert directions ON (global directions fail on MoE)."""
profile = detect_architecture("openai/gpt-oss-20b")
assert profile.method_overrides.get("per_expert_directions", False)
def test_large_moe_riemannian_on(self):
"""Large MoE: Riemannian ON (curved shared layer geometry)."""
profile = detect_architecture("deepseek-ai/DeepSeek-V3.2")
assert profile.breakthrough_modules.get("riemannian", False)
def test_reasoning_dense_jailbreak_contrast(self):
"""Reasoning Dense: Jailbreak contrast ON for thinking-chain refusal."""
profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B")
assert profile.method_overrides.get("use_jailbreak_contrast", False)
def test_reasoning_moe_gentle_transplant(self):
"""Reasoning MoE: transplant_blend very low (preserve reasoning)."""
profile = detect_architecture("deepseek-ai/DeepSeek-R1")
assert profile.method_overrides.get("transplant_blend", 1.0) <= 0.10
# ---------------------------------------------------------------------------
# Profile summary
# ---------------------------------------------------------------------------
class TestProfileSummary:
"""Test the human-readable profile summary."""
def test_summary_contains_profile_label(self):
profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
summary = get_profile_summary(profile)
assert "Dense Standard" in summary
def test_summary_contains_method(self):
profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
summary = get_profile_summary(profile)
assert "aggressive" in summary
def test_summary_contains_citations(self):
profile = detect_architecture("openai/gpt-oss-20b")
summary = get_profile_summary(profile)
assert "SAFEx" in summary or "Cracken" in summary
def test_summary_contains_moe_info(self):
profile = detect_architecture("openai/gpt-oss-20b")
summary = get_profile_summary(profile)
assert "MoE" in summary
def test_summary_contains_breakthrough_modules(self):
profile = detect_architecture("openai/gpt-oss-20b")
summary = get_profile_summary(profile)
assert "conditional" in summary
# ---------------------------------------------------------------------------
# apply_profile_to_method_config
# ---------------------------------------------------------------------------
class TestApplyProfile:
"""Test that profile overrides are correctly applied to method configs."""
def test_overrides_applied(self):
from obliteratus.abliterate import METHODS
profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B")
base = dict(METHODS["aggressive"])
merged = apply_profile_to_method_config(profile, base)
assert merged["n_directions"] == profile.method_overrides["n_directions"]
def test_non_overridden_preserved(self):
from obliteratus.abliterate import METHODS
profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
base = dict(METHODS["aggressive"])
merged = apply_profile_to_method_config(profile, base)
# norm_preserve is not in overrides, should come from base
assert merged["norm_preserve"] == base["norm_preserve"]
def test_empty_overrides(self):
from obliteratus.abliterate import METHODS
base = dict(METHODS["advanced"])
profile = ArchitectureProfile(
arch_class=ArchitectureClass.DENSE,
reasoning_class=ReasoningClass.STANDARD,
method_overrides={},
breakthrough_modules={},
)
merged = apply_profile_to_method_config(profile, base)
assert merged == base
def test_override_key_not_in_base_is_added(self):
"""Override keys absent from base config should be added to result.
This is important for the UI auto-detect path: keys like
use_jailbreak_contrast may not exist in the base method config
but are valid pipeline parameters that app.py reads via merged.get().
"""
from obliteratus.abliterate import METHODS
base = dict(METHODS["advanced"])
profile = ArchitectureProfile(
arch_class=ArchitectureClass.DENSE,
reasoning_class=ReasoningClass.STANDARD,
method_overrides={"use_jailbreak_contrast": True},
breakthrough_modules={},
)
merged = apply_profile_to_method_config(profile, base)
assert merged["use_jailbreak_contrast"] is True
# ---------------------------------------------------------------------------
# All 6 profile combinations
# ---------------------------------------------------------------------------
class TestAllSixProfiles:
"""Verify label, method, overrides, and breakthrough modules for each profile."""
def _make_moe_config(self, num_experts=8, active=2, layers=32, hidden=4096):
class C:
model_type = "mixtral"
num_hidden_layers = layers
hidden_size = hidden
intermediate_size = hidden * 4
vocab_size = 32000
num_local_experts = num_experts
num_experts_per_tok = active
return C()
def test_dense_standard_full(self):
p = detect_architecture("meta-llama/Llama-3.1-8B-Instruct")
assert p.profile_label == "Dense Standard"
assert p.recommended_method == "aggressive"
assert not p.breakthrough_modules["riemannian"]
assert p.breakthrough_modules["anti_ouroboros"]
assert p.breakthrough_modules["spectral_cert"]
assert not p.breakthrough_modules["conditional"]
assert len(p.profile_description) > 0
assert len(p.research_citations) > 0
def test_dense_reasoning_full(self):
p = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B")
assert p.profile_label == "Dense Reasoning"
assert p.recommended_method == "aggressive"
assert p.method_overrides["n_directions"] >= 12
assert p.method_overrides["refinement_passes"] >= 4
assert p.method_overrides["use_jailbreak_contrast"] is True
assert p.method_overrides["use_chat_template"] is True
assert p.breakthrough_modules["anti_ouroboros"]
assert p.breakthrough_modules["riemannian"]
assert p.breakthrough_modules["conditional"]
assert p.breakthrough_modules["spectral_cert"]
assert len(p.profile_description) > 0
def test_small_moe_standard_full(self):
config = self._make_moe_config(num_experts=8, active=2)
p = detect_architecture("custom/small-moe-model", config=config)
assert p.profile_label == "Small MoE Standard"
assert p.arch_class == ArchitectureClass.SMALL_MOE
assert p.recommended_method == "surgical"
assert p.method_overrides["per_expert_directions"] is True
assert p.method_overrides["invert_refusal"] is False
assert p.method_overrides["project_embeddings"] is False
assert p.breakthrough_modules["conditional"]
assert p.breakthrough_modules["anti_ouroboros"]
assert p.breakthrough_modules["spectral_cert"]
assert not p.breakthrough_modules["riemannian"]
assert len(p.profile_description) > 0
def test_small_moe_reasoning_full(self):
"""The most fragile combination: MoE + reasoning."""
config = self._make_moe_config(num_experts=8, active=2)
# Add "think" to name to trigger reasoning detection
p = detect_architecture("custom/small-moe-think-model", config=config)
assert p.profile_label == "Small MoE Reasoning"
assert p.arch_class == ArchitectureClass.SMALL_MOE
assert p.reasoning_class == ReasoningClass.REASONING
assert p.recommended_method == "surgical"
assert p.method_overrides["per_expert_directions"] is True
assert p.method_overrides["use_jailbreak_contrast"] is True
assert p.method_overrides["use_chat_template"] is True
assert p.method_overrides["invert_refusal"] is False
assert p.breakthrough_modules["conditional"]
assert p.breakthrough_modules["anti_ouroboros"]
assert p.breakthrough_modules["spectral_cert"]
assert len(p.profile_description) > 0
def test_large_moe_standard_full(self):
config = self._make_moe_config(num_experts=256, active=8, layers=61, hidden=7168)
p = detect_architecture("custom/large-moe-model", config=config)
assert p.profile_label == "Large MoE Standard"
assert p.arch_class == ArchitectureClass.LARGE_MOE
assert p.recommended_method == "surgical"
assert p.method_overrides["per_expert_directions"] is True
assert p.method_overrides["layer_adaptive_strength"] is True
assert p.method_overrides["expert_transplant"] is True
assert p.method_overrides["transplant_blend"] == 0.10
assert p.method_overrides["attention_head_surgery"] is True
assert p.method_overrides["project_embeddings"] is False
assert p.breakthrough_modules["conditional"]
assert p.breakthrough_modules["riemannian"]
assert p.breakthrough_modules["anti_ouroboros"]
assert p.breakthrough_modules["spectral_cert"]
assert len(p.profile_description) > 0
def test_large_moe_reasoning_full(self):
config = self._make_moe_config(num_experts=256, active=8, layers=61, hidden=7168)
p = detect_architecture("custom/large-moe-r1-model", config=config)
assert p.profile_label == "Large MoE Reasoning"
assert p.arch_class == ArchitectureClass.LARGE_MOE
assert p.reasoning_class == ReasoningClass.REASONING
assert p.recommended_method == "surgical"
assert p.method_overrides["n_directions"] == 8
assert p.method_overrides["transplant_blend"] == 0.08
assert p.method_overrides["use_jailbreak_contrast"] is True
assert p.method_overrides["safety_neuron_masking"] is True
assert p.breakthrough_modules["conditional"]
assert p.breakthrough_modules["riemannian"]
assert p.breakthrough_modules["anti_ouroboros"]
assert p.breakthrough_modules["spectral_cert"]
assert len(p.profile_description) > 0
# ---------------------------------------------------------------------------
# Edge cases
# ---------------------------------------------------------------------------
class TestEdgeCases:
"""Edge cases for architecture detection."""
def test_empty_model_name(self):
"""Empty string should fall through to Dense Standard."""
profile = detect_architecture("")
assert profile.arch_class == ArchitectureClass.DENSE
assert profile.reasoning_class == ReasoningClass.STANDARD
def test_unknown_model_type_in_config(self):
"""Unknown model_type should not cause MoE classification."""
class MockConfig:
model_type = "banana"
num_hidden_layers = 12
hidden_size = 768
intermediate_size = 3072
vocab_size = 30522
profile = detect_architecture("custom/unknown-arch", config=MockConfig())
assert profile.arch_class == ArchitectureClass.DENSE
def test_config_with_zero_experts(self):
"""num_local_experts=0 should not trigger MoE."""
class MockConfig:
model_type = "llama"
num_hidden_layers = 32
hidden_size = 4096
intermediate_size = 11008
vocab_size = 32000
num_local_experts = 0
profile = detect_architecture("custom/dense-with-zero", config=MockConfig())
assert not profile.is_moe
assert profile.arch_class == ArchitectureClass.DENSE
def test_allcaps_model_name(self):
"""Case-insensitive matching should work for all-caps names."""
profile = detect_architecture("DEEPSEEK-AI/DEEPSEEK-R1-DISTILL-QWEN-7B")
assert profile.reasoning_class == ReasoningClass.REASONING
assert profile.arch_class == ArchitectureClass.DENSE # distill = dense
def test_single_expert_is_moe(self):
"""num_local_experts=1 is technically MoE (single expert)."""
class MockConfig:
model_type = "llama"
num_hidden_layers = 32
hidden_size = 4096
intermediate_size = 11008
vocab_size = 32000
num_local_experts = 1
profile = detect_architecture("custom/single-expert", config=MockConfig())
# 1 expert still triggers MoE detection (the code treats any >0 as MoE)
assert profile.is_moe