File size: 9,230 Bytes
31f0e50 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 | """
Unit Tests for Model Loading and Caching.
Tests model download, caching, and loading time requirements.
"""
import pytest
import time
from unittest.mock import patch, MagicMock
class TestIndicBERTLoading:
"""Tests for IndicBERT model loading."""
def test_indicbert_loads_successfully(self):
"""Test IndicBERT can be loaded."""
try:
from transformers import AutoModel, AutoTokenizer
start = time.time()
tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-bert")
model = AutoModel.from_pretrained("ai4bharat/indic-bert")
load_time = time.time() - start
assert tokenizer is not None
assert model is not None
assert load_time > 0
except ImportError:
pytest.skip("transformers not installed")
except Exception as e:
pytest.skip(f"IndicBERT model not available: {e}")
def test_indicbert_load_time_requirement(self):
"""Test IndicBERT loads in <10 seconds (after first download)."""
try:
from transformers import AutoModel, AutoTokenizer
start = time.time()
tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-bert")
model = AutoModel.from_pretrained("ai4bharat/indic-bert")
load_time = time.time() - start
# Note: First load may be slower due to model download
# This test verifies subsequent loads are fast
# If model is cached, it should load quickly
if load_time > 10.0:
pytest.skip(f"IndicBERT load time {load_time:.2f}s exceeds 10s (may be first download)")
assert load_time < 10.0, f"IndicBERT should load in <10s, took {load_time:.2f}s"
except ImportError:
pytest.skip("transformers not installed")
except Exception as e:
pytest.skip(f"IndicBERT model not available: {e}")
def test_indicbert_model_functionality(self):
"""Test IndicBERT model can process text."""
try:
from transformers import AutoModel, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-bert")
model = AutoModel.from_pretrained("ai4bharat/indic-bert")
test_text = "Test message for scam detection"
inputs = tokenizer(test_text, return_tensors="pt", truncation=True, max_length=512)
model.eval()
# Model should process without errors
outputs = model(**inputs)
assert outputs is not None
assert hasattr(outputs, 'last_hidden_state') or hasattr(outputs, 'logits')
except ImportError:
pytest.skip("transformers not installed")
except Exception as e:
pytest.skip(f"IndicBERT model not available: {e}")
class TestSpacyLoading:
"""Tests for spaCy model loading."""
def test_spacy_loads_successfully(self):
"""Test spaCy model can be loaded."""
try:
import spacy
start = time.time()
nlp = spacy.load("en_core_web_sm")
load_time = time.time() - start
assert nlp is not None
assert load_time > 0
except ImportError:
pytest.skip("spacy not installed")
except OSError:
pytest.skip("spaCy model 'en_core_web_sm' not installed")
except Exception as e:
pytest.skip(f"spaCy model not available: {e}")
def test_spacy_load_time_requirement(self):
"""Test spaCy loads in <5 seconds."""
try:
import spacy
start = time.time()
nlp = spacy.load("en_core_web_sm")
load_time = time.time() - start
assert load_time < 5.0, f"spaCy should load in <5s, took {load_time:.2f}s"
except ImportError:
pytest.skip("spacy not installed")
except OSError:
pytest.skip("spaCy model 'en_core_web_sm' not installed")
except Exception as e:
pytest.skip(f"spaCy model not available: {e}")
def test_spacy_model_functionality(self):
"""Test spaCy model can process text."""
try:
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp("Test message for entity extraction")
assert doc is not None
assert len(doc) > 0
except ImportError:
pytest.skip("spacy not installed")
except OSError:
pytest.skip("spaCy model 'en_core_web_sm' not installed")
except Exception as e:
pytest.skip(f"spaCy model not available: {e}")
class TestSentenceTransformersLoading:
"""Tests for sentence-transformers model loading."""
def test_sentence_transformers_loads_successfully(self):
"""Test sentence-transformers model can be loaded."""
try:
from sentence_transformers import SentenceTransformer
start = time.time()
embedder = SentenceTransformer('all-MiniLM-L6-v2')
load_time = time.time() - start
assert embedder is not None
assert load_time > 0
except ImportError:
pytest.skip("sentence-transformers not installed")
except Exception as e:
pytest.skip(f"Sentence transformers model not available: {e}")
def test_sentence_transformers_functionality(self):
"""Test sentence-transformers model can encode text."""
try:
from sentence_transformers import SentenceTransformer
embedder = SentenceTransformer('all-MiniLM-L6-v2')
test_text = "Test message for embedding"
embedding = embedder.encode(test_text)
assert embedding is not None
assert len(embedding) > 0
assert isinstance(embedding, (list, type(embedding)))
except ImportError:
pytest.skip("sentence-transformers not installed")
except Exception as e:
pytest.skip(f"Sentence transformers model not available: {e}")
class TestModelSetupScript:
"""Tests for setup_models.py script functions."""
def test_download_indicbert_function_exists(self):
"""Test download_indicbert function exists and is callable."""
from scripts.setup_models import download_indicbert
assert callable(download_indicbert)
def test_download_spacy_function_exists(self):
"""Test download_spacy function exists and is callable."""
from scripts.setup_models import download_spacy
assert callable(download_spacy)
def test_download_sentence_transformers_function_exists(self):
"""Test download_sentence_transformers function exists and is callable."""
from scripts.setup_models import download_sentence_transformers
assert callable(download_sentence_transformers)
def test_verify_models_function_exists(self):
"""Test verify_models function exists and is callable."""
from scripts.setup_models import verify_models
assert callable(verify_models)
def test_download_indicbert_returns_tuple(self):
"""Test download_indicbert returns (bool, Optional[float])."""
from scripts.setup_models import download_indicbert
success, load_time = download_indicbert()
assert isinstance(success, bool)
assert load_time is None or isinstance(load_time, (int, float))
def test_download_spacy_returns_tuple(self):
"""Test download_spacy returns (bool, Optional[float])."""
from scripts.setup_models import download_spacy
success, load_time = download_spacy()
assert isinstance(success, bool)
assert load_time is None or isinstance(load_time, (int, float))
def test_download_sentence_transformers_returns_tuple(self):
"""Test download_sentence_transformers returns (bool, Optional[float])."""
from scripts.setup_models import download_sentence_transformers
success, load_time = download_sentence_transformers()
assert isinstance(success, bool)
assert load_time is None or isinstance(load_time, (int, float))
def test_verify_models_returns_tuple(self):
"""Test verify_models returns (bool, dict)."""
from scripts.setup_models import verify_models
all_verified, load_times = verify_models()
assert isinstance(all_verified, bool)
assert isinstance(load_times, dict)
|