Spaces:

Intel
/

low_bit_open_llm_leaderboard

Running

File size: 29,684 Bytes

"""Unit tests for auto_eval (add_new_eval) and auto_quant (add_new_quant).

Test models:
  - nytopop/Qwen3-30B-A3B.w4a16   (quantized W4A16 → auto_eval)
  - Qwen/Qwen3-30B-A3B            (FP bfloat16 → auto_quant)
"""

import json
import logging
import re
import sys
import os
from types import SimpleNamespace
from pathlib import Path

# Ensure project root is on the path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

logging.basicConfig(level=logging.DEBUG, format="%(name)s %(levelname)s: %(message)s")
logger = logging.getLogger("test_submit")

# ── Imports from the project ─────────────────────────────────────────────────
from transformers import AutoConfig
from huggingface_hub import HfApi

from src.submission.check_validity import (
    get_model_size,
    get_quantized_model_parameters_memory,
    validate_quantization_scheme,
    estimate_weight_memory_gb,
    estimate_quantization_memory_gb,
    get_num_layers,
    select_gpu,
    SUPPORTED_QUANT_SCHEMES,
    SUPPORTED_INPUT_DTYPES,
    PRECISION_TO_BITS,
    BYTES,
    KNOWN_SIZE_FACTOR,
    get_gpu_display_name,
    is_model_on_hub,
)
import src.submission.check_validity as check_validity
from src.submission.submit import (
    _normalize_file_tag_component,
    add_new_eval,
    add_new_quant,
)
import src.submission.submit as submit_module

API = HfApi()


def _is_error(result: str) -> bool:
    """Check if result is a styled_error (red) response."""
    return "color: red" in result

def _is_success(result: str) -> bool:
    """Check if result is a styled_message (green) response."""
    return "color: green" in result

def _is_warning(result: str) -> bool:
    """Check if result is a styled_warning (orange) response."""
    return "color: orange" in result

def _consume_generator(gen):
    """Consume a generator (or plain value), return the last yielded value."""
    if hasattr(gen, '__next__'):
        result = None
        for value in gen:
            result = value
        return result
    return gen

# ═══════════════════════════════════════════════════════════════════════════════
# Helper: inspect model config
# ═══════════════════════════════════════════════════════════════════════════════

def inspect_model(model_name: str, revision: str = "main"):
    """Fetch and print model config details for debugging."""
    print(f"\n{'='*70}")
    print(f"  Inspecting: {model_name}")
    print(f"{'='*70}")

    # 1. AutoConfig
    try:
        config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=True)
        print(f"\n[Config] architectures:        {getattr(config, 'architectures', None)}")
        print(f"[Config] torch_dtype:           {getattr(config, 'torch_dtype', None)}")
        print(f"[Config] num_hidden_layers:     {getattr(config, 'num_hidden_layers', None)}")
        print(f"[Config] num_attention_heads:    {getattr(config, 'num_attention_heads', None)}")
        print(f"[Config] hidden_size:           {getattr(config, 'hidden_size', None)}")

        # MoE-specific
        for moe_attr in ("num_experts", "num_local_experts", "num_experts_per_tok"):
            val = getattr(config, moe_attr, None)
            if val is not None:
                print(f"[Config] {moe_attr}: {val}")

        # quantization_config
        qc = getattr(config, "quantization_config", None)
        if qc is not None:
            if hasattr(qc, "to_dict"):
                qc_dict = qc.to_dict()
            elif isinstance(qc, dict):
                qc_dict = qc
            else:
                qc_dict = {"raw": str(qc)}
            print(f"[Config] quantization_config:   {json.dumps(qc_dict, indent=2)}")
        else:
            print(f"[Config] quantization_config:   None")
    except Exception as e:
        print(f"[Config] ERROR: {e}")
        config = None

    # 2. Model info from HF API
    try:
        info = API.model_info(repo_id=model_name, revision=revision)
        print(f"\n[ModelInfo] id:      {info.id}")
        print(f"[ModelInfo] likes:   {info.likes}")
        print(f"[ModelInfo] siblings count: {len(info.siblings) if info.siblings else 0}")

        # List sibling files (first 20)
        if info.siblings:
            fnames = [s.rfilename for s in info.siblings]
            print(f"[ModelInfo] files (first 20):")
            for f in fnames[:20]:
                print(f"    {f}")
            if len(fnames) > 20:
                print(f"    ... and {len(fnames) - 20} more")
    except Exception as e:
        print(f"[ModelInfo] ERROR: {e}")
        info = None

    return config, info

def test_file_tag_component_prefers_parenthesized_value():
    assert _normalize_file_tag_component("INT4 (W4A16)") == "W4A16"
    assert _normalize_file_tag_component("INT8 ( W8A16 )") == "W8A16"
    assert _normalize_file_tag_component("MXFP4") == "MXFP4"


def test_gpu_display_name_uses_full_label():
    assert get_gpu_display_name("4090") == "NVIDIA GeForce RTX 4090"
    assert get_gpu_display_name("A100") == "NVIDIA A100-SXM4-80GB"
    assert get_gpu_display_name("H200") == "H200"


def test_get_num_layers_supports_nested_raw_config_dict():
    config = {
        "model_type": "qwen3_5",
        "text_config": {
            "num_hidden_layers": 36,
            "torch_dtype": "bfloat16",
        },
    }
    assert get_num_layers(config) == 36


def test_get_model_size_uses_config_param_count_fallback(monkeypatch):
    def _raise(*_args, **_kwargs):
        raise RuntimeError("metadata unavailable")

    monkeypatch.setattr(check_validity, "get_safetensors_metadata", _raise)
    model_info = SimpleNamespace(id="org/custom-model")
    params_b, size_gb = get_model_size(
        model_info,
        precision="16bit",
        model_config={"num_parameters": "0.8B"},
    )

    assert params_b == 0.8
    assert size_gb == 1.6


def test_is_model_on_hub_returns_authorization_guidance_for_gated_repo(monkeypatch):
    def _raise(*_args, **_kwargs):
        raise RuntimeError("You are trying to access a gated repo.")

    monkeypatch.setattr(check_validity.AutoConfig, "from_pretrained", _raise)

    ok, message, config = is_model_on_hub("org/gated-model", revision="main")

    assert ok is False
    assert config is None
    assert "https://huggingface.co/org/gated-model" in message
    assert "request or accept access first" in message


def test_add_new_quant_surfaces_gated_repo_authorization_message(monkeypatch):
    monkeypatch.setattr("src.submission.submit._load_quant_cache", lambda: None)
    monkeypatch.setattr("src.submission.submit._common_pre_checks", lambda *_args, **_kwargs: None)
    monkeypatch.setattr(
        "src.submission.submit.is_model_on_hub",
        lambda **_kwargs: (
            False,
            "is gated on the Hugging Face Hub. Please open https://huggingface.co/org/gated-model and request or accept access first. After access is granted, resubmit the model.",
            None,
        ),
    )

    result = _consume_generator(add_new_quant(
        model="org/gated-model",
        revision="main",
        private=False,
    ))

    assert _is_warning(result)
    assert "https://huggingface.co/org/gated-model" in result
    assert "request or accept access first" in result


def test_add_new_quant_allows_whitelisted_resubmit_for_failed_entry(monkeypatch, tmp_path):
    status_root = tmp_path / "status"
    pending_root = tmp_path / "pending"
    status_dir = status_root / "quant"
    pending_dir = pending_root / "quant"
    status_dir.mkdir(parents=True)
    pending_dir.mkdir(parents=True)

    scheme = SUPPORTED_QUANT_SCHEMES["INT4 (W4A16)"]
    model_name = "org/model"
    dedup_key = (
        f"{model_name}_main_{scheme.name}_{scheme.precision}_{scheme.weight_dtype}_{scheme.name}"
    )

    failed_entry = {
        "model": model_name,
        "revision": "main",
        "quant_scheme": scheme.name,
        "quant_precision": scheme.precision,
        "quant_weight_dtype": scheme.weight_dtype,
        "status": "Quant Failed",
    }
    (status_dir / "failed.json").write_text(json.dumps(failed_entry), encoding="utf-8")
    pending_entry = dict(failed_entry, status="Pending")
    (pending_dir / "stale_request_copy.json").write_text(json.dumps(pending_entry), encoding="utf-8")

    monkeypatch.setattr(submit_module, "GIT_STATUS_PATH", str(status_root))
    monkeypatch.setattr(submit_module, "GIT_REQUESTS_PATH", str(pending_root))
    monkeypatch.setattr(submit_module, "SIZE_WHITELIST", {"alice"})
    monkeypatch.setattr(submit_module, "_QUANT_REQUESTED", {dedup_key})
    monkeypatch.setattr(submit_module, "_SUBMITTER_DATES", {})
    monkeypatch.setattr(submit_module, "_load_quant_cache", lambda: None)
    monkeypatch.setattr(submit_module, "_common_pre_checks", lambda *_args, **_kwargs: None)
    monkeypatch.setattr(
        submit_module,
        "is_model_on_hub",
        lambda **_kwargs: (
            True,
            "",
            {"architectures": ["TestArch"], "torch_dtype": "float16", "num_hidden_layers": 24},
        ),
    )
    monkeypatch.setattr(
        submit_module.API,
        "model_info",
        lambda **_kwargs: SimpleNamespace(cardData={"license": "apache-2.0"}, likes=0),
    )
    monkeypatch.setattr(
        submit_module,
        "check_model_card",
        lambda *_args, **_kwargs: (True, "", SimpleNamespace(text="x" * 300, data=SimpleNamespace(tags=[]))),
    )
    monkeypatch.setattr(submit_module, "get_model_tags", lambda *_args, **_kwargs: [])
    monkeypatch.setattr(submit_module, "is_license_approved", lambda *_args, **_kwargs: True)
    monkeypatch.setattr(submit_module, "get_model_size", lambda *_args, **_kwargs: (7.0, 14.0))
    monkeypatch.setattr(submit_module, "get_num_layers", lambda *_args, **_kwargs: 24)
    monkeypatch.setattr(submit_module, "estimate_quantization_memory_gb", lambda *_args, **_kwargs: 12.0)
    monkeypatch.setattr(submit_module, "estimate_weight_memory_gb", lambda *_args, **_kwargs: 5.0)
    monkeypatch.setattr(submit_module, "select_gpu_with_override", lambda *_args, **_kwargs: ("A100", 1))
    monkeypatch.setattr(submit_module, "get_gpu_display_name", lambda value: value)
    monkeypatch.setattr(submit_module, "compute_single_eta", lambda *_args, **_kwargs: 1)
    monkeypatch.setattr(submit_module, "format_eta", lambda *_args, **_kwargs: "1h")

    uploaded = {"called": False, "file_tag": None}

    def _fake_upload(entry, user_name, model_path, file_tag, model, task_label="eval"):
        uploaded["called"] = True
        uploaded["file_tag"] = file_tag

    monkeypatch.setattr(submit_module, "_upload_to_hub", _fake_upload)

    result = _consume_generator(add_new_quant(
        model=model_name,
        revision="main",
        private=False,
        quant_scheme="INT4 (W4A16)",
        submitted_by="alice",
    ))

    assert uploaded["called"] is True
    assert _is_success(result)
    # Re-submission must not overwrite the previous failed status file: the
    # filename gets a timestamp suffix appended to keep both records.
    assert uploaded["file_tag"] is not None
    assert re.search(r"_\d{4}", uploaded["file_tag"]), uploaded["file_tag"]


def test_add_new_eval_allows_whitelisted_resubmit_for_failed_entry(monkeypatch, tmp_path):
    status_root = tmp_path / "status"
    pending_root = tmp_path / "pending"
    status_dir = status_root / "eval"
    pending_dir = pending_root / "eval"
    status_dir.mkdir(parents=True)
    pending_dir.mkdir(parents=True)

    model_name = "org/quant-model"
    dedup_key = f"{model_name}_main_AutoRound_4bit_int4_INT4 (W4A16)"

    failed_entry = {
        "model": model_name,
        "revision": "main",
        "quant_type": "AutoRound",
        "precision": "4bit",
        "weight_dtype": "int4",
        "compute_dtype": "INT4 (W4A16)",
        "status": "Eval Failed",
    }
    (status_dir / "failed.json").write_text(json.dumps(failed_entry), encoding="utf-8")
    pending_entry = dict(failed_entry, status="Pending")
    (pending_dir / "stale_request_copy.json").write_text(json.dumps(pending_entry), encoding="utf-8")

    monkeypatch.setattr(submit_module, "GIT_STATUS_PATH", str(status_root))
    monkeypatch.setattr(submit_module, "GIT_REQUESTS_PATH", str(pending_root))
    monkeypatch.setattr(submit_module, "SIZE_WHITELIST", {"alice"})
    monkeypatch.setattr(submit_module, "_EVAL_REQUESTED", {dedup_key})
    monkeypatch.setattr(submit_module, "_SUBMITTER_DATES", {})
    monkeypatch.setattr(submit_module, "_load_eval_cache", lambda: None)
    monkeypatch.setattr(submit_module, "_common_pre_checks", lambda *_args, **_kwargs: None)
    monkeypatch.setattr(
        submit_module,
        "is_model_on_hub",
        lambda **_kwargs: (
            True,
            "",
            {"architectures": ["TestArch"], "quantization_config": {"quant_method": "AutoRound"}},
        ),
    )
    monkeypatch.setattr(
        submit_module,
        "validate_quantization_scheme",
        lambda *_args, **_kwargs: (True, SimpleNamespace(name="INT4 (W4A16)", precision="4bit", weight_dtype="int4", bits=4, hardware="A100", script="auto_eval"), "AutoRound"),
    )
    monkeypatch.setattr(
        submit_module.API,
        "model_info",
        lambda **_kwargs: SimpleNamespace(cardData={"license": "apache-2.0"}, likes=0),
    )
    monkeypatch.setattr(
        submit_module,
        "check_model_card",
        lambda *_args, **_kwargs: (True, "", SimpleNamespace(text="x" * 300, data=SimpleNamespace(tags=[]))),
    )
    monkeypatch.setattr(submit_module, "get_model_tags", lambda *_args, **_kwargs: [])
    monkeypatch.setattr(submit_module, "is_license_approved", lambda *_args, **_kwargs: True)
    monkeypatch.setattr(submit_module, "get_quantized_model_parameters_memory", lambda *_args, **_kwargs: (7.0, 3.5))
    monkeypatch.setattr(submit_module, "estimate_weight_memory_gb", lambda *_args, **_kwargs: 5.0)
    monkeypatch.setattr(submit_module, "select_gpu_with_override", lambda *_args, **_kwargs: ("A100", 1))
    monkeypatch.setattr(submit_module, "get_gpu_display_name", lambda value: value)
    monkeypatch.setattr(submit_module, "compute_single_eta", lambda *_args, **_kwargs: 1)
    monkeypatch.setattr(submit_module, "format_eta", lambda *_args, **_kwargs: "1h")

    uploaded = {"called": False, "file_tag": None}

    def _fake_upload(entry, user_name, model_path, file_tag, model, task_label="eval"):
        uploaded["called"] = True
        uploaded["file_tag"] = file_tag

    monkeypatch.setattr(submit_module, "_upload_to_hub", _fake_upload)

    result = _consume_generator(add_new_eval(
        model=model_name,
        revision="main",
        private=False,
        compute_dtype="INT4 (W4A16)",
        submitted_by="alice",
    ))

    assert uploaded["called"] is True
    assert _is_success(result)
    # Re-submission must not overwrite the previous failed status file: the
    # filename gets a timestamp suffix appended to keep both records.
    assert uploaded["file_tag"] is not None
    assert re.search(r"_\d{4}", uploaded["file_tag"]), uploaded["file_tag"]

# ═══════════════════════════════════════════════════════════════════════════════
# Test 1: auto_eval with nytopop/Qwen3-30B-A3B.w4a16
# ═══════════════════════════════════════════════════════════════════════════════

def test_auto_eval():
    model_name = "nytopop/Qwen3-30B-A3B.w4a16"
    compute_dtype = "INT4 (W4A16)"  # what the UI passes

    print(f"\n{'#'*70}")
    print(f"  TEST: auto_eval  model={model_name}")
    print(f"{'#'*70}")

    config, info = inspect_model(model_name)

    # ── Step-by-step validation ──────────────────────────────────────────
    print(f"\n--- Step 1: Quantization scheme validation ---")
    qc = getattr(config, "quantization_config", None) if config else None
    is_valid, scheme, detected_method = validate_quantization_scheme(qc, compute_dtype)
    print(f"  is_valid:          {is_valid}")
    print(f"  scheme:            {scheme}")
    print(f"  detected_method:   {detected_method}")

    if scheme:
        print(f"  scheme.name:       {scheme.name}")
        print(f"  scheme.precision:  {scheme.precision}")
        print(f"  scheme.weight_dtype: {scheme.weight_dtype}")
        print(f"  scheme.bits:       {scheme.bits}")
        print(f"  scheme.hardware:   {scheme.hardware}")
        print(f"  scheme.script:     {scheme.script}")

    # ── Step 2: Model size ───────────────────────────────────────────────
    print(f"\n--- Step 2: Model size (get_quantized_model_parameters_memory) ---")
    if info:
        quant_method = detected_method.lower() if detected_method else ""
        precision = scheme.precision if scheme else "4bit"
        print(f"  quant_method arg: '{quant_method}'")
        print(f"  bits arg:         '{precision}'")
        print(f"  KNOWN_SIZE_FACTOR has '{quant_method}': {quant_method in KNOWN_SIZE_FACTOR}")

        params_b, size_gb = get_quantized_model_parameters_memory(
            info, quant_method=quant_method, bits=precision
        )
        print(f"  params_b:  {params_b}")
        print(f"  size_gb:   {size_gb}")

        # Also test get_model_size for comparison
        print(f"\n--- Step 2b: get_model_size (FP-style) ---")
        params_b2, size_gb2 = get_model_size(info, precision=precision)
        print(f"  params_b:  {params_b2}")
        print(f"  size_gb:   {size_gb2}")
    else:
        params_b = None

    # ── Step 3: VRAM estimation ──────────────────────────────────────────
    print(f"\n--- Step 3: VRAM estimation ---")
    if params_b:
        bits = PRECISION_TO_BITS.get(precision, 4)
        est_mem = estimate_weight_memory_gb(params_b, bits=bits, overhead_factor=4.4)
        print(f"  bits:              {bits}")
        print(f"  estimated_vram:    {est_mem} GB")
    else:
        print(f"  SKIPPED (no params)")
        est_mem = None

    # ── Step 4: GPU selection ────────────────────────────────────────────
    print(f"\n--- Step 4: GPU selection ---")
    if est_mem:
        gpu_type, gpu_nums = select_gpu(est_mem)
        print(f"  gpu_type:  {gpu_type}")
        print(f"  gpu_nums:  {gpu_nums}")

    # ── Step 5: Call add_new_eval end-to-end ─────────────────────────────
    print(f"\n--- Step 5: add_new_eval (end-to-end) ---")
    result = _consume_generator(add_new_eval(
        model=model_name,
        revision="main",
        private=False,
        compute_dtype=compute_dtype,
    ))
    print(f"  Result: {result}")

    # ── Validate expected values ─────────────────────────────────────────
    print(f"\n--- Validation checks ---")
    errors = []

    if not is_valid:
        errors.append(f"FAIL: Model should be detected as W4A16 quantized but is_valid={is_valid}")

    if params_b is not None:
        if not (25 <= params_b <= 35):
            errors.append(f"WARN: Expected params ~30B, got {params_b}B")
    else:
        errors.append("FAIL: params_b is None")

    if _is_error(result):
        errors.append(f"FAIL: add_new_eval returned error: {result[:200]}")

    if errors:
        for e in errors:
            print(f"  ❌ {e}")
    else:
        print(f"  ✅ All checks passed")

    return len(errors) == 0


# ═══════════════════════════════════════════════════════════════════════════════
# Test 2: auto_quant with Qwen/Qwen3-30B-A3B
# ═══════════════════════════════════════════════════════════════════════════════

def test_auto_quant():
    model_name = "Qwen/Qwen3-30B-A3B"
    quant_scheme = "INT4 (W4A16)"

    print(f"\n{'#'*70}")
    print(f"  TEST: auto_quant  model={model_name}")
    print(f"{'#'*70}")

    config, info = inspect_model(model_name)

    # ── Step 1: Confirm NOT quantized ────────────────────────────────────
    print(f"\n--- Step 1: Confirm model is FP (not quantized) ---")
    qc = getattr(config, "quantization_config", None) if config else None
    print(f"  quantization_config: {qc}")
    if qc:
        print(f"  ❌ Model appears quantized — auto_quant should reject it")

    torch_dtype = getattr(config, "torch_dtype", None)
    input_dtype = str(torch_dtype) if torch_dtype else "float16"
    input_bits = SUPPORTED_INPUT_DTYPES.get(input_dtype)
    print(f"  torch_dtype:    {torch_dtype}")
    print(f"  input_dtype:    {input_dtype}")
    print(f"  input_bits:     {input_bits}")

    # ── Step 2: Model size (FP) ──────────────────────────────────────────
    print(f"\n--- Step 2: Model size (FP) ---")
    params_b = None
    size_gb = None
    if info:
        fp_label = "16bit" if input_bits == 16 else "32bit"
        params_b, size_gb = get_model_size(info, precision=fp_label)
        print(f"  precision arg: '{fp_label}'")
        print(f"  params_b:      {params_b}")
        print(f"  size_gb:       {size_gb}")

    # ── Step 3: Layer count ──────────────────────────────────────────────
    print(f"\n--- Step 3: Layer count ---")
    num_layers = get_num_layers(config) if config else None
    print(f"  num_layers: {num_layers}")

    # ── Step 4: Quantization VRAM ────────────────────────────────────────
    print(f"\n--- Step 4: Quantization VRAM ---")
    quant_mem = None
    if size_gb and num_layers:
        quant_mem = estimate_quantization_memory_gb(size_gb, num_layers, overhead_factor=1.5)
        print(f"  model_weight_gb: {size_gb}")
        print(f"  num_layers:      {num_layers}")
        print(f"  quant_vram:      {quant_mem} GB")
    else:
        print(f"  SKIPPED (size_gb={size_gb}, num_layers={num_layers})")

    # ── Step 5: Eval VRAM (post-quantization) ────────────────────────────
    print(f"\n--- Step 5: Eval VRAM (post-quant W4A16) ---")
    scheme = SUPPORTED_QUANT_SCHEMES.get(quant_scheme)
    eval_mem = None
    if params_b and scheme:
        eval_mem = estimate_weight_memory_gb(params_b, bits=scheme.bits, overhead_factor=4.4)
        print(f"  params_b:     {params_b}")
        print(f"  output_bits:  {scheme.bits}")
        print(f"  eval_vram:    {eval_mem} GB")

        quant_model_size_gb = round(params_b * (scheme.bits / 8.0), 2)
        print(f"  quant_model_size_gb: {quant_model_size_gb}")

    # ── Step 6: GPU selection ────────────────────────────────────────────
    print(f"\n--- Step 6: GPU selection ---")
    if quant_mem:
        qgpu, qn = select_gpu(quant_mem)
        print(f"  Quantization: {qgpu} × {qn}")
    if eval_mem:
        egpu, en = select_gpu(eval_mem)
        print(f"  Evaluation:   {egpu} × {en}")

    # ── Step 7: Call add_new_quant end-to-end ────────────────────────────
    print(f"\n--- Step 7: add_new_quant (end-to-end) ---")
    result = _consume_generator(add_new_quant(
        model=model_name,
        revision="main",
        private=False,
        quant_scheme=quant_scheme,
    ))
    print(f"  Result: {result}")

    # ── Validate expected values ─────────────────────────────────────────
    print(f"\n--- Validation checks ---")
    errors = []

    if qc:
        errors.append("FAIL: FP model has quantization_config — auto_quant should reject")

    if params_b is not None:
        if not (25 <= params_b <= 35):
            errors.append(f"WARN: Expected params ~30B, got {params_b}B")
    else:
        errors.append("FAIL: params_b is None")

    if input_bits is None:
        errors.append(f"FAIL: input_dtype '{input_dtype}' not in SUPPORTED_INPUT_DTYPES")

    if num_layers is None or num_layers <= 0:
        errors.append(f"FAIL: Could not determine num_layers: {num_layers}")

    if _is_error(result):
        errors.append(f"FAIL: add_new_quant returned error: {result[:200]}")

    if errors:
        for e in errors:
            print(f"  ❌ {e}")
    else:
        print(f"  ✅ All checks passed")

    return len(errors) == 0


# ═══════════════════════════════════════════════════════════════════════════════
# Test 3: Cross-check — call auto_eval on FP model (should fail)
# ═══════════════════════════════════════════════════════════════════════════════

def test_auto_eval_rejects_fp_model():
    """auto_eval should reject an FP (non-quantized) model."""
    model_name = "Qwen/Qwen3-30B-A3B"

    print(f"\n{'#'*70}")
    print(f"  TEST: auto_eval should REJECT FP model: {model_name}")
    print(f"{'#'*70}")

    result = _consume_generator(add_new_eval(
        model=model_name,
        revision="main",
        private=False,
        compute_dtype="INT4 (W4A16)",
    ))
    print(f"  Result: {result}")

    if "color: red" in result or "color:red" in result:
        print(f"  ✅ Correctly rejected FP model")
        return True
    else:
        print(f"  ❌ FAIL: Should have rejected FP model but got: {result[:200]}")
        return False


# ═══════════════════════════════════════════════════════════════════════════════
# Test 4: Cross-check — call auto_quant on quantized model (should fail)
# ═══════════════════════════════════════════════════════════════════════════════

def test_auto_quant_rejects_quantized_model():
    """auto_quant should reject an already-quantized model."""
    model_name = "nytopop/Qwen3-30B-A3B.w4a16"

    print(f"\n{'#'*70}")
    print(f"  TEST: auto_quant should REJECT quantized model: {model_name}")
    print(f"{'#'*70}")

    result = _consume_generator(add_new_quant(
        model=model_name,
        revision="main",
        private=False,
        quant_scheme="INT4 (W4A16)",
    ))
    print(f"  Result: {result}")

    if _is_error(result) or _is_warning(result):
        print(f"  ✅ Correctly rejected quantized model")
        return True
    else:
        print(f"  ❌ FAIL: Should have rejected quantized model but got: {result[:200]}")
        return False


# ═══════════════════════════════════════════════════════════════════════════════
# Main
# ═══════════════════════════════════════════════════════════════════════════════

if __name__ == "__main__":
    print("=" * 70)
    print("  submit.py Unit Tests")
    print("=" * 70)

    results = {}
    results["test_auto_eval"] = test_auto_eval()
    results["test_auto_quant"] = test_auto_quant()
    results["test_auto_eval_rejects_fp"] = test_auto_eval_rejects_fp_model()
    results["test_auto_quant_rejects_quantized"] = test_auto_quant_rejects_quantized_model()

    print(f"\n{'='*70}")
    print("  SUMMARY")
    print(f"{'='*70}")
    for name, passed in results.items():
        status = "✅ PASS" if passed else "❌ FAIL"
        print(f"  {status}  {name}")

    total = len(results)
    passed = sum(1 for v in results.values() if v)
    print(f"\n  {passed}/{total} tests passed")
    print(f"{'='*70}")