opencode-zerogpu / tests /test_models.py
serenichron's picture
Initial implementation of ZeroGPU OpenCode Provider
adcb9bd
"""Tests for model loading and inference."""
import pytest
from unittest.mock import patch, MagicMock
import sys
import os
# Add parent directory to path for imports
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from config import estimate_model_size, should_quantize
class TestModelSizeEstimation:
"""Test model size estimation logic."""
def test_known_model_size(self):
"""Test size estimation for known models."""
assert estimate_model_size("meta-llama/Llama-3.1-8B-Instruct") == 8
assert estimate_model_size("meta-llama/Llama-3.1-70B-Instruct") == 70
assert estimate_model_size("mistralai/Mistral-7B-Instruct-v0.3") == 7
def test_extract_size_from_name(self):
"""Test size extraction from model name pattern."""
assert estimate_model_size("some-org/CustomModel-13B") == 13
assert estimate_model_size("another/model-2B-test") == 2
assert estimate_model_size("org/Model-32B-Instruct") == 32
def test_unknown_model_size(self):
"""Test handling of models with unknown size."""
assert estimate_model_size("unknown/model-without-size") is None
assert estimate_model_size("org/mystery-model") is None
class TestQuantizationDecision:
"""Test automatic quantization decisions."""
def test_small_model_no_quantization(self):
"""Small models should not be quantized."""
assert should_quantize("meta-llama/Llama-3.1-8B-Instruct") == "none"
assert should_quantize("mistralai/Mistral-7B-Instruct-v0.3") == "none"
def test_large_model_int4_quantization(self):
"""70B+ models should use INT4."""
assert should_quantize("meta-llama/Llama-3.1-70B-Instruct") == "int4"
assert should_quantize("Qwen/Qwen2.5-72B-Instruct") == "int4"
def test_unknown_model_no_quantization(self):
"""Unknown models should not be auto-quantized."""
assert should_quantize("unknown/mystery-model") == "none"
class TestModelLoading:
"""Test model loading functionality."""
@patch("models.AutoModelForCausalLM")
@patch("models.AutoTokenizer")
def test_load_model_creates_loaded_model(
self, mock_tokenizer_class, mock_model_class, mock_tokenizer, mock_model
):
"""Test that load_model returns a LoadedModel instance."""
mock_tokenizer_class.from_pretrained.return_value = mock_tokenizer
mock_model_class.from_pretrained.return_value = mock_model
from models import load_model, unload_model
# Ensure clean state
unload_model()
loaded = load_model("test-model/test-7B")
assert loaded.model_id == "test-model/test-7B"
assert loaded.model is not None
assert loaded.tokenizer is not None
@patch("models.AutoModelForCausalLM")
@patch("models.AutoTokenizer")
def test_load_model_caches_result(
self, mock_tokenizer_class, mock_model_class, mock_tokenizer, mock_model
):
"""Test that loading the same model twice uses cache."""
mock_tokenizer_class.from_pretrained.return_value = mock_tokenizer
mock_model_class.from_pretrained.return_value = mock_model
from models import load_model, unload_model
# Ensure clean state
unload_model()
# First load
load_model("test-model/test-7B")
first_call_count = mock_model_class.from_pretrained.call_count
# Second load (should use cache)
load_model("test-model/test-7B")
second_call_count = mock_model_class.from_pretrained.call_count
# Should not have called from_pretrained again
assert first_call_count == second_call_count
class TestChatTemplate:
"""Test chat template application."""
@patch("models.load_model")
def test_apply_chat_template_with_tokenizer_method(self, mock_load_model, mock_tokenizer):
"""Test chat template when tokenizer has apply_chat_template."""
from models import apply_chat_template, LoadedModel
mock_load_model.return_value = LoadedModel(
model_id="test-model",
model=MagicMock(),
tokenizer=mock_tokenizer,
)
messages = [
{"role": "user", "content": "Hello!"},
]
result = apply_chat_template("test-model", messages)
assert "<|user|>" in result
assert "Hello!" in result
assert "<|assistant|>" in result # Generation prompt
@patch("models.load_model")
def test_apply_chat_template_fallback(self, mock_load_model):
"""Test fallback formatting when tokenizer lacks apply_chat_template."""
from models import apply_chat_template, LoadedModel
# Tokenizer without apply_chat_template
simple_tokenizer = MagicMock()
del simple_tokenizer.apply_chat_template
mock_load_model.return_value = LoadedModel(
model_id="test-model",
model=MagicMock(),
tokenizer=simple_tokenizer,
)
messages = [
{"role": "system", "content": "You are helpful."},
{"role": "user", "content": "Hi!"},
]
result = apply_chat_template("test-model", messages)
assert "System:" in result
assert "User:" in result
assert "Assistant:" in result