Spaces:
Sleeping
Sleeping
| """Tests for model loading and inference.""" | |
| import pytest | |
| from unittest.mock import patch, MagicMock | |
| import sys | |
| import os | |
| # Add parent directory to path for imports | |
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| from config import estimate_model_size, should_quantize | |
| class TestModelSizeEstimation: | |
| """Test model size estimation logic.""" | |
| def test_known_model_size(self): | |
| """Test size estimation for known models.""" | |
| assert estimate_model_size("meta-llama/Llama-3.1-8B-Instruct") == 8 | |
| assert estimate_model_size("meta-llama/Llama-3.1-70B-Instruct") == 70 | |
| assert estimate_model_size("mistralai/Mistral-7B-Instruct-v0.3") == 7 | |
| def test_extract_size_from_name(self): | |
| """Test size extraction from model name pattern.""" | |
| assert estimate_model_size("some-org/CustomModel-13B") == 13 | |
| assert estimate_model_size("another/model-2B-test") == 2 | |
| assert estimate_model_size("org/Model-32B-Instruct") == 32 | |
| def test_unknown_model_size(self): | |
| """Test handling of models with unknown size.""" | |
| assert estimate_model_size("unknown/model-without-size") is None | |
| assert estimate_model_size("org/mystery-model") is None | |
| class TestQuantizationDecision: | |
| """Test automatic quantization decisions.""" | |
| def test_small_model_no_quantization(self): | |
| """Small models should not be quantized.""" | |
| assert should_quantize("meta-llama/Llama-3.1-8B-Instruct") == "none" | |
| assert should_quantize("mistralai/Mistral-7B-Instruct-v0.3") == "none" | |
| def test_large_model_int4_quantization(self): | |
| """70B+ models should use INT4.""" | |
| assert should_quantize("meta-llama/Llama-3.1-70B-Instruct") == "int4" | |
| assert should_quantize("Qwen/Qwen2.5-72B-Instruct") == "int4" | |
| def test_unknown_model_no_quantization(self): | |
| """Unknown models should not be auto-quantized.""" | |
| assert should_quantize("unknown/mystery-model") == "none" | |
| class TestModelLoading: | |
| """Test model loading functionality.""" | |
| def test_load_model_creates_loaded_model( | |
| self, mock_tokenizer_class, mock_model_class, mock_tokenizer, mock_model | |
| ): | |
| """Test that load_model returns a LoadedModel instance.""" | |
| mock_tokenizer_class.from_pretrained.return_value = mock_tokenizer | |
| mock_model_class.from_pretrained.return_value = mock_model | |
| from models import load_model, unload_model | |
| # Ensure clean state | |
| unload_model() | |
| loaded = load_model("test-model/test-7B") | |
| assert loaded.model_id == "test-model/test-7B" | |
| assert loaded.model is not None | |
| assert loaded.tokenizer is not None | |
| def test_load_model_caches_result( | |
| self, mock_tokenizer_class, mock_model_class, mock_tokenizer, mock_model | |
| ): | |
| """Test that loading the same model twice uses cache.""" | |
| mock_tokenizer_class.from_pretrained.return_value = mock_tokenizer | |
| mock_model_class.from_pretrained.return_value = mock_model | |
| from models import load_model, unload_model | |
| # Ensure clean state | |
| unload_model() | |
| # First load | |
| load_model("test-model/test-7B") | |
| first_call_count = mock_model_class.from_pretrained.call_count | |
| # Second load (should use cache) | |
| load_model("test-model/test-7B") | |
| second_call_count = mock_model_class.from_pretrained.call_count | |
| # Should not have called from_pretrained again | |
| assert first_call_count == second_call_count | |
| class TestChatTemplate: | |
| """Test chat template application.""" | |
| def test_apply_chat_template_with_tokenizer_method(self, mock_load_model, mock_tokenizer): | |
| """Test chat template when tokenizer has apply_chat_template.""" | |
| from models import apply_chat_template, LoadedModel | |
| mock_load_model.return_value = LoadedModel( | |
| model_id="test-model", | |
| model=MagicMock(), | |
| tokenizer=mock_tokenizer, | |
| ) | |
| messages = [ | |
| {"role": "user", "content": "Hello!"}, | |
| ] | |
| result = apply_chat_template("test-model", messages) | |
| assert "<|user|>" in result | |
| assert "Hello!" in result | |
| assert "<|assistant|>" in result # Generation prompt | |
| def test_apply_chat_template_fallback(self, mock_load_model): | |
| """Test fallback formatting when tokenizer lacks apply_chat_template.""" | |
| from models import apply_chat_template, LoadedModel | |
| # Tokenizer without apply_chat_template | |
| simple_tokenizer = MagicMock() | |
| del simple_tokenizer.apply_chat_template | |
| mock_load_model.return_value = LoadedModel( | |
| model_id="test-model", | |
| model=MagicMock(), | |
| tokenizer=simple_tokenizer, | |
| ) | |
| messages = [ | |
| {"role": "system", "content": "You are helpful."}, | |
| {"role": "user", "content": "Hi!"}, | |
| ] | |
| result = apply_chat_template("test-model", messages) | |
| assert "System:" in result | |
| assert "User:" in result | |
| assert "Assistant:" in result | |