Spaces:
Running on Zero
Running on Zero
| """Tests for architecture-aware preset defaults. | |
| Tests the detection logic and recommended parameter overrides for each | |
| architecture class (dense/MoE, standard/reasoning). | |
| """ | |
| from __future__ import annotations | |
| from obliteratus.architecture_profiles import ( | |
| ArchitectureClass, | |
| ArchitectureProfile, | |
| ReasoningClass, | |
| detect_architecture, | |
| get_profile_summary, | |
| apply_profile_to_method_config, | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Detection: Dense models | |
| # --------------------------------------------------------------------------- | |
| class TestDenseDetection: | |
| """Test that standard dense models are correctly classified.""" | |
| def test_llama_is_dense(self): | |
| profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct") | |
| assert profile.arch_class == ArchitectureClass.DENSE | |
| assert profile.reasoning_class == ReasoningClass.STANDARD | |
| assert not profile.is_moe | |
| def test_qwen_dense_is_dense(self): | |
| profile = detect_architecture("Qwen/Qwen2.5-7B-Instruct") | |
| assert profile.arch_class == ArchitectureClass.DENSE | |
| assert not profile.is_moe | |
| def test_gemma_is_dense(self): | |
| profile = detect_architecture("google/gemma-3-27b-it") | |
| assert profile.arch_class == ArchitectureClass.DENSE | |
| def test_phi_is_dense(self): | |
| profile = detect_architecture("microsoft/Phi-4-mini-instruct") | |
| assert profile.arch_class == ArchitectureClass.DENSE | |
| def test_mistral_small_is_dense(self): | |
| profile = detect_architecture("mistralai/Mistral-Small-24B-Instruct-2501") | |
| assert profile.arch_class == ArchitectureClass.DENSE | |
| def test_yi_is_dense(self): | |
| profile = detect_architecture("01-ai/Yi-1.5-9B-Chat") | |
| assert profile.arch_class == ArchitectureClass.DENSE | |
| def test_dense_label(self): | |
| profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct") | |
| assert profile.profile_label == "Dense Standard" | |
| def test_dense_recommended_method(self): | |
| profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct") | |
| assert profile.recommended_method == "aggressive" | |
| # --------------------------------------------------------------------------- | |
| # Detection: MoE models | |
| # --------------------------------------------------------------------------- | |
| class TestMoEDetection: | |
| """Test that MoE models are correctly classified.""" | |
| def test_gpt_oss_is_moe(self): | |
| """GPT-OSS is MoE. Without config, defaults to small (conservative).""" | |
| profile = detect_architecture("openai/gpt-oss-20b") | |
| assert profile.is_moe | |
| assert profile.arch_class == ArchitectureClass.SMALL_MOE | |
| def test_qwen3_30b_is_small_moe(self): | |
| profile = detect_architecture("Qwen/Qwen3-30B-A3B") | |
| assert profile.is_moe | |
| def test_deepseek_v3_is_large_moe(self): | |
| profile = detect_architecture("deepseek-ai/DeepSeek-V3.2") | |
| assert profile.is_moe | |
| def test_kimi_k2_is_large_moe(self): | |
| profile = detect_architecture("moonshotai/Kimi-K2-Instruct") | |
| assert profile.is_moe | |
| def test_qwen3_235b_is_moe(self): | |
| profile = detect_architecture("Qwen/Qwen3-235B-A22B") | |
| assert profile.is_moe | |
| def test_glm_47_is_moe(self): | |
| profile = detect_architecture("zai-org/GLM-4.7") | |
| assert profile.is_moe | |
| def test_llama4_maverick_is_moe(self): | |
| profile = detect_architecture("meta-llama/Llama-4-Maverick-17B-128E-Instruct") | |
| assert profile.is_moe | |
| def test_step_flash_is_moe(self): | |
| profile = detect_architecture("stepfun-ai/Step-3.5-Flash") | |
| assert profile.is_moe | |
| def test_minimax_is_moe(self): | |
| profile = detect_architecture("MiniMaxAI/MiniMax-M2.1") | |
| assert profile.is_moe | |
| def test_mistral_large_3_is_moe(self): | |
| profile = detect_architecture("mistralai/Mistral-Large-3-675B-Instruct-2512") | |
| assert profile.is_moe | |
| def test_moe_recommended_method_is_surgical(self): | |
| """All MoE profiles recommend surgical method.""" | |
| profile = detect_architecture("openai/gpt-oss-20b") | |
| assert profile.recommended_method == "surgical" | |
| def test_gpt_oss_with_config_is_small_moe(self): | |
| """GPT-OSS with config providing expert count → small MoE.""" | |
| class MockConfig: | |
| model_type = "gpt_neox" | |
| num_hidden_layers = 32 | |
| hidden_size = 2560 | |
| intermediate_size = 6912 | |
| vocab_size = 50304 | |
| num_local_experts = 8 | |
| num_experts_per_tok = 2 | |
| profile = detect_architecture("openai/gpt-oss-20b", config=MockConfig()) | |
| assert profile.is_moe | |
| assert profile.arch_class == ArchitectureClass.SMALL_MOE | |
| # --------------------------------------------------------------------------- | |
| # Detection: Reasoning models | |
| # --------------------------------------------------------------------------- | |
| class TestReasoningDetection: | |
| """Test that reasoning models are correctly classified.""" | |
| def test_r1_distill_qwen_is_reasoning(self): | |
| profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B") | |
| assert profile.reasoning_class == ReasoningClass.REASONING | |
| def test_r1_distill_llama_is_reasoning(self): | |
| profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Llama-8B") | |
| assert profile.reasoning_class == ReasoningClass.REASONING | |
| def test_r1_distill_is_dense_reasoning(self): | |
| """R1 distills are dense (distilled from MoE into dense).""" | |
| profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-14B") | |
| assert profile.arch_class == ArchitectureClass.DENSE | |
| assert profile.reasoning_class == ReasoningClass.REASONING | |
| assert profile.profile_label == "Dense Reasoning" | |
| def test_olmo_think_is_reasoning(self): | |
| profile = detect_architecture("allenai/Olmo-3.1-32B-Think") | |
| assert profile.reasoning_class == ReasoningClass.REASONING | |
| def test_olmo_standard_is_not_reasoning(self): | |
| """OLMo (without Think) must NOT be classified as reasoning. | |
| Regression test: 'olmo' contains 'o1' substring.""" | |
| profile = detect_architecture("allenai/Olmo-3-7B-Instruct") | |
| assert profile.reasoning_class == ReasoningClass.STANDARD | |
| def test_falcon3_is_not_reasoning(self): | |
| """falcon3 must NOT match 'o3' reasoning pattern.""" | |
| profile = detect_architecture("tiiuae/Falcon3-7B-Instruct") | |
| assert profile.reasoning_class == ReasoningClass.STANDARD | |
| def test_full_r1_is_moe_reasoning(self): | |
| profile = detect_architecture("deepseek-ai/DeepSeek-R1") | |
| assert profile.is_moe | |
| assert profile.reasoning_class == ReasoningClass.REASONING | |
| def test_reasoning_dense_more_directions(self): | |
| """Dense reasoning models need more directions (>=12) to span refusal.""" | |
| profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B") | |
| assert profile.arch_class == ArchitectureClass.DENSE | |
| assert profile.method_overrides.get("n_directions", 0) >= 12 | |
| def test_reasoning_dense_more_passes(self): | |
| """Dense reasoning models need more refinement passes (>=4).""" | |
| profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B") | |
| assert profile.arch_class == ArchitectureClass.DENSE | |
| assert profile.method_overrides.get("refinement_passes", 0) >= 4 | |
| def test_non_reasoning_is_standard(self): | |
| profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct") | |
| assert profile.reasoning_class == ReasoningClass.STANDARD | |
| # --------------------------------------------------------------------------- | |
| # Detection with config object | |
| # --------------------------------------------------------------------------- | |
| class TestConfigDetection: | |
| """Test detection when a mock config is provided.""" | |
| def test_moe_config_attrs(self): | |
| """Config with num_local_experts should be detected as MoE.""" | |
| class MockConfig: | |
| model_type = "mixtral" | |
| num_hidden_layers = 32 | |
| hidden_size = 4096 | |
| intermediate_size = 14336 | |
| vocab_size = 32000 | |
| num_local_experts = 8 | |
| num_experts_per_tok = 2 | |
| profile = detect_architecture( | |
| "custom/mixtral-model", config=MockConfig(), | |
| num_layers=32, hidden_size=4096, | |
| ) | |
| assert profile.is_moe | |
| assert profile.num_experts == 8 | |
| assert profile.num_active_experts == 2 | |
| def test_large_moe_threshold(self): | |
| """MoE models with >100B params should be classified as large.""" | |
| class MockConfig: | |
| model_type = "deepseek_v3" | |
| num_hidden_layers = 61 | |
| hidden_size = 7168 | |
| intermediate_size = 18432 | |
| vocab_size = 102400 | |
| n_routed_experts = 256 | |
| num_experts_per_tok = 8 | |
| profile = detect_architecture( | |
| "custom/large-moe", config=MockConfig(), | |
| ) | |
| assert profile.arch_class == ArchitectureClass.LARGE_MOE | |
| def test_small_moe_threshold(self): | |
| """MoE models with <=16 experts should be classified as small.""" | |
| class MockConfig: | |
| model_type = "mixtral" | |
| num_hidden_layers = 32 | |
| hidden_size = 4096 | |
| intermediate_size = 14336 | |
| vocab_size = 32000 | |
| num_local_experts = 8 | |
| num_experts_per_tok = 2 | |
| profile = detect_architecture( | |
| "custom/small-moe", config=MockConfig(), | |
| ) | |
| assert profile.arch_class == ArchitectureClass.SMALL_MOE | |
| def test_dense_config(self): | |
| """Config without MoE attributes should be dense.""" | |
| class MockConfig: | |
| model_type = "llama" | |
| num_hidden_layers = 32 | |
| hidden_size = 4096 | |
| intermediate_size = 11008 | |
| vocab_size = 32000 | |
| profile = detect_architecture( | |
| "custom/dense-model", config=MockConfig(), | |
| ) | |
| assert profile.arch_class == ArchitectureClass.DENSE | |
| assert not profile.is_moe | |
| def test_llama4_scout_is_large_moe(self): | |
| """Llama 4 Scout: 109B total params with 16 experts → LARGE_MOE. | |
| Regression test: params > 100B must override low expert count.""" | |
| class MockConfig: | |
| model_type = "llama4" | |
| num_hidden_layers = 48 | |
| hidden_size = 5120 | |
| intermediate_size = 14336 | |
| vocab_size = 202048 | |
| num_local_experts = 16 | |
| num_experts_per_tok = 1 | |
| profile = detect_architecture( | |
| "meta-llama/Llama-4-Scout-17B-16E-Instruct", | |
| config=MockConfig(), | |
| ) | |
| assert profile.is_moe | |
| assert profile.arch_class == ArchitectureClass.LARGE_MOE | |
| # --------------------------------------------------------------------------- | |
| # Recommended defaults validation | |
| # --------------------------------------------------------------------------- | |
| class TestRecommendedDefaults: | |
| """Test that recommended defaults match research findings.""" | |
| def test_dense_standard_no_riemannian(self): | |
| """Dense Standard: Riemannian OFF (manifolds are flat).""" | |
| profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct") | |
| assert not profile.breakthrough_modules.get("riemannian", True) | |
| def test_dense_standard_anti_ouroboros_on(self): | |
| """Dense Standard: Anti-Ouroboros ON for self-repair mapping.""" | |
| profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct") | |
| assert profile.breakthrough_modules.get("anti_ouroboros", False) | |
| def test_dense_standard_spectral_cert_on(self): | |
| """Dense Standard: Spectral cert ON for verification.""" | |
| profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct") | |
| assert profile.breakthrough_modules.get("spectral_cert", False) | |
| def test_moe_conditional_on(self): | |
| """MoE: Conditional abliteration is #1 technique (Cracken AI 2025).""" | |
| profile = detect_architecture("openai/gpt-oss-20b") | |
| assert profile.breakthrough_modules.get("conditional", False) | |
| def test_moe_no_project_embeddings(self): | |
| """MoE: Project embeddings OFF (cascades through router).""" | |
| profile = detect_architecture("openai/gpt-oss-20b") | |
| assert not profile.method_overrides.get("project_embeddings", True) | |
| def test_moe_per_expert_directions(self): | |
| """MoE: Per-expert directions ON (global directions fail on MoE).""" | |
| profile = detect_architecture("openai/gpt-oss-20b") | |
| assert profile.method_overrides.get("per_expert_directions", False) | |
| def test_large_moe_riemannian_on(self): | |
| """Large MoE: Riemannian ON (curved shared layer geometry).""" | |
| profile = detect_architecture("deepseek-ai/DeepSeek-V3.2") | |
| assert profile.breakthrough_modules.get("riemannian", False) | |
| def test_reasoning_dense_jailbreak_contrast(self): | |
| """Reasoning Dense: Jailbreak contrast ON for thinking-chain refusal.""" | |
| profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B") | |
| assert profile.method_overrides.get("use_jailbreak_contrast", False) | |
| def test_reasoning_moe_gentle_transplant(self): | |
| """Reasoning MoE: transplant_blend very low (preserve reasoning).""" | |
| profile = detect_architecture("deepseek-ai/DeepSeek-R1") | |
| assert profile.method_overrides.get("transplant_blend", 1.0) <= 0.10 | |
| # --------------------------------------------------------------------------- | |
| # Profile summary | |
| # --------------------------------------------------------------------------- | |
| class TestProfileSummary: | |
| """Test the human-readable profile summary.""" | |
| def test_summary_contains_profile_label(self): | |
| profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct") | |
| summary = get_profile_summary(profile) | |
| assert "Dense Standard" in summary | |
| def test_summary_contains_method(self): | |
| profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct") | |
| summary = get_profile_summary(profile) | |
| assert "aggressive" in summary | |
| def test_summary_contains_citations(self): | |
| profile = detect_architecture("openai/gpt-oss-20b") | |
| summary = get_profile_summary(profile) | |
| assert "SAFEx" in summary or "Cracken" in summary | |
| def test_summary_contains_moe_info(self): | |
| profile = detect_architecture("openai/gpt-oss-20b") | |
| summary = get_profile_summary(profile) | |
| assert "MoE" in summary | |
| def test_summary_contains_breakthrough_modules(self): | |
| profile = detect_architecture("openai/gpt-oss-20b") | |
| summary = get_profile_summary(profile) | |
| assert "conditional" in summary | |
| # --------------------------------------------------------------------------- | |
| # apply_profile_to_method_config | |
| # --------------------------------------------------------------------------- | |
| class TestApplyProfile: | |
| """Test that profile overrides are correctly applied to method configs.""" | |
| def test_overrides_applied(self): | |
| from obliteratus.abliterate import METHODS | |
| profile = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B") | |
| base = dict(METHODS["aggressive"]) | |
| merged = apply_profile_to_method_config(profile, base) | |
| assert merged["n_directions"] == profile.method_overrides["n_directions"] | |
| def test_non_overridden_preserved(self): | |
| from obliteratus.abliterate import METHODS | |
| profile = detect_architecture("meta-llama/Llama-3.1-8B-Instruct") | |
| base = dict(METHODS["aggressive"]) | |
| merged = apply_profile_to_method_config(profile, base) | |
| # norm_preserve is not in overrides, should come from base | |
| assert merged["norm_preserve"] == base["norm_preserve"] | |
| def test_empty_overrides(self): | |
| from obliteratus.abliterate import METHODS | |
| base = dict(METHODS["advanced"]) | |
| profile = ArchitectureProfile( | |
| arch_class=ArchitectureClass.DENSE, | |
| reasoning_class=ReasoningClass.STANDARD, | |
| method_overrides={}, | |
| breakthrough_modules={}, | |
| ) | |
| merged = apply_profile_to_method_config(profile, base) | |
| assert merged == base | |
| def test_override_key_not_in_base_is_added(self): | |
| """Override keys absent from base config should be added to result. | |
| This is important for the UI auto-detect path: keys like | |
| use_jailbreak_contrast may not exist in the base method config | |
| but are valid pipeline parameters that app.py reads via merged.get(). | |
| """ | |
| from obliteratus.abliterate import METHODS | |
| base = dict(METHODS["advanced"]) | |
| profile = ArchitectureProfile( | |
| arch_class=ArchitectureClass.DENSE, | |
| reasoning_class=ReasoningClass.STANDARD, | |
| method_overrides={"use_jailbreak_contrast": True}, | |
| breakthrough_modules={}, | |
| ) | |
| merged = apply_profile_to_method_config(profile, base) | |
| assert merged["use_jailbreak_contrast"] is True | |
| # --------------------------------------------------------------------------- | |
| # All 6 profile combinations | |
| # --------------------------------------------------------------------------- | |
| class TestAllSixProfiles: | |
| """Verify label, method, overrides, and breakthrough modules for each profile.""" | |
| def _make_moe_config(self, num_experts=8, active=2, layers=32, hidden=4096): | |
| class C: | |
| model_type = "mixtral" | |
| num_hidden_layers = layers | |
| hidden_size = hidden | |
| intermediate_size = hidden * 4 | |
| vocab_size = 32000 | |
| num_local_experts = num_experts | |
| num_experts_per_tok = active | |
| return C() | |
| def test_dense_standard_full(self): | |
| p = detect_architecture("meta-llama/Llama-3.1-8B-Instruct") | |
| assert p.profile_label == "Dense Standard" | |
| assert p.recommended_method == "aggressive" | |
| assert not p.breakthrough_modules["riemannian"] | |
| assert p.breakthrough_modules["anti_ouroboros"] | |
| assert p.breakthrough_modules["spectral_cert"] | |
| assert not p.breakthrough_modules["conditional"] | |
| assert len(p.profile_description) > 0 | |
| assert len(p.research_citations) > 0 | |
| def test_dense_reasoning_full(self): | |
| p = detect_architecture("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B") | |
| assert p.profile_label == "Dense Reasoning" | |
| assert p.recommended_method == "aggressive" | |
| assert p.method_overrides["n_directions"] >= 12 | |
| assert p.method_overrides["refinement_passes"] >= 4 | |
| assert p.method_overrides["use_jailbreak_contrast"] is True | |
| assert p.method_overrides["use_chat_template"] is True | |
| assert p.breakthrough_modules["anti_ouroboros"] | |
| assert p.breakthrough_modules["riemannian"] | |
| assert p.breakthrough_modules["conditional"] | |
| assert p.breakthrough_modules["spectral_cert"] | |
| assert len(p.profile_description) > 0 | |
| def test_small_moe_standard_full(self): | |
| config = self._make_moe_config(num_experts=8, active=2) | |
| p = detect_architecture("custom/small-moe-model", config=config) | |
| assert p.profile_label == "Small MoE Standard" | |
| assert p.arch_class == ArchitectureClass.SMALL_MOE | |
| assert p.recommended_method == "surgical" | |
| assert p.method_overrides["per_expert_directions"] is True | |
| assert p.method_overrides["invert_refusal"] is False | |
| assert p.method_overrides["project_embeddings"] is False | |
| assert p.breakthrough_modules["conditional"] | |
| assert p.breakthrough_modules["anti_ouroboros"] | |
| assert p.breakthrough_modules["spectral_cert"] | |
| assert not p.breakthrough_modules["riemannian"] | |
| assert len(p.profile_description) > 0 | |
| def test_small_moe_reasoning_full(self): | |
| """The most fragile combination: MoE + reasoning.""" | |
| config = self._make_moe_config(num_experts=8, active=2) | |
| # Add "think" to name to trigger reasoning detection | |
| p = detect_architecture("custom/small-moe-think-model", config=config) | |
| assert p.profile_label == "Small MoE Reasoning" | |
| assert p.arch_class == ArchitectureClass.SMALL_MOE | |
| assert p.reasoning_class == ReasoningClass.REASONING | |
| assert p.recommended_method == "surgical" | |
| assert p.method_overrides["per_expert_directions"] is True | |
| assert p.method_overrides["use_jailbreak_contrast"] is True | |
| assert p.method_overrides["use_chat_template"] is True | |
| assert p.method_overrides["invert_refusal"] is False | |
| assert p.breakthrough_modules["conditional"] | |
| assert p.breakthrough_modules["anti_ouroboros"] | |
| assert p.breakthrough_modules["spectral_cert"] | |
| assert len(p.profile_description) > 0 | |
| def test_large_moe_standard_full(self): | |
| config = self._make_moe_config(num_experts=256, active=8, layers=61, hidden=7168) | |
| p = detect_architecture("custom/large-moe-model", config=config) | |
| assert p.profile_label == "Large MoE Standard" | |
| assert p.arch_class == ArchitectureClass.LARGE_MOE | |
| assert p.recommended_method == "surgical" | |
| assert p.method_overrides["per_expert_directions"] is True | |
| assert p.method_overrides["layer_adaptive_strength"] is True | |
| assert p.method_overrides["expert_transplant"] is True | |
| assert p.method_overrides["transplant_blend"] == 0.10 | |
| assert p.method_overrides["attention_head_surgery"] is True | |
| assert p.method_overrides["project_embeddings"] is False | |
| assert p.breakthrough_modules["conditional"] | |
| assert p.breakthrough_modules["riemannian"] | |
| assert p.breakthrough_modules["anti_ouroboros"] | |
| assert p.breakthrough_modules["spectral_cert"] | |
| assert len(p.profile_description) > 0 | |
| def test_large_moe_reasoning_full(self): | |
| config = self._make_moe_config(num_experts=256, active=8, layers=61, hidden=7168) | |
| p = detect_architecture("custom/large-moe-r1-model", config=config) | |
| assert p.profile_label == "Large MoE Reasoning" | |
| assert p.arch_class == ArchitectureClass.LARGE_MOE | |
| assert p.reasoning_class == ReasoningClass.REASONING | |
| assert p.recommended_method == "surgical" | |
| assert p.method_overrides["n_directions"] == 8 | |
| assert p.method_overrides["transplant_blend"] == 0.08 | |
| assert p.method_overrides["use_jailbreak_contrast"] is True | |
| assert p.method_overrides["safety_neuron_masking"] is True | |
| assert p.breakthrough_modules["conditional"] | |
| assert p.breakthrough_modules["riemannian"] | |
| assert p.breakthrough_modules["anti_ouroboros"] | |
| assert p.breakthrough_modules["spectral_cert"] | |
| assert len(p.profile_description) > 0 | |
| # --------------------------------------------------------------------------- | |
| # Edge cases | |
| # --------------------------------------------------------------------------- | |
| class TestEdgeCases: | |
| """Edge cases for architecture detection.""" | |
| def test_empty_model_name(self): | |
| """Empty string should fall through to Dense Standard.""" | |
| profile = detect_architecture("") | |
| assert profile.arch_class == ArchitectureClass.DENSE | |
| assert profile.reasoning_class == ReasoningClass.STANDARD | |
| def test_unknown_model_type_in_config(self): | |
| """Unknown model_type should not cause MoE classification.""" | |
| class MockConfig: | |
| model_type = "banana" | |
| num_hidden_layers = 12 | |
| hidden_size = 768 | |
| intermediate_size = 3072 | |
| vocab_size = 30522 | |
| profile = detect_architecture("custom/unknown-arch", config=MockConfig()) | |
| assert profile.arch_class == ArchitectureClass.DENSE | |
| def test_config_with_zero_experts(self): | |
| """num_local_experts=0 should not trigger MoE.""" | |
| class MockConfig: | |
| model_type = "llama" | |
| num_hidden_layers = 32 | |
| hidden_size = 4096 | |
| intermediate_size = 11008 | |
| vocab_size = 32000 | |
| num_local_experts = 0 | |
| profile = detect_architecture("custom/dense-with-zero", config=MockConfig()) | |
| assert not profile.is_moe | |
| assert profile.arch_class == ArchitectureClass.DENSE | |
| def test_allcaps_model_name(self): | |
| """Case-insensitive matching should work for all-caps names.""" | |
| profile = detect_architecture("DEEPSEEK-AI/DEEPSEEK-R1-DISTILL-QWEN-7B") | |
| assert profile.reasoning_class == ReasoningClass.REASONING | |
| assert profile.arch_class == ArchitectureClass.DENSE # distill = dense | |
| def test_single_expert_is_moe(self): | |
| """num_local_experts=1 is technically MoE (single expert).""" | |
| class MockConfig: | |
| model_type = "llama" | |
| num_hidden_layers = 32 | |
| hidden_size = 4096 | |
| intermediate_size = 11008 | |
| vocab_size = 32000 | |
| num_local_experts = 1 | |
| profile = detect_architecture("custom/single-expert", config=MockConfig()) | |
| # 1 expert still triggers MoE detection (the code treats any >0 as MoE) | |
| assert profile.is_moe | |