""" Unit tests for embeddings functionality. """ import unittest import numpy as np from django.test import TestCase from hue_portal.core.embeddings import ( get_embedding_model, generate_embedding, generate_embeddings_batch, cosine_similarity, get_embedding_dimension ) from hue_portal.core.embedding_utils import ( save_embedding, load_embedding, has_embedding ) class EmbeddingsTestCase(TestCase): """Test embedding generation and utilities.""" def test_get_embedding_model(self): """Test loading embedding model.""" model = get_embedding_model() # Model might not be available in test environment # Just check that function doesn't crash self.assertIsNotNone(model or True) def test_generate_embedding(self): """Test generating embedding for a single text.""" text = "Thủ tục đăng ký cư trú" embedding = generate_embedding(text) if embedding is not None: self.assertIsInstance(embedding, np.ndarray) self.assertGreater(len(embedding), 0) def test_generate_embeddings_batch(self): """Test generating embeddings for multiple texts.""" texts = [ "Thủ tục đăng ký cư trú", "Mức phạt vượt đèn đỏ", "Địa chỉ công an phường" ] embeddings = generate_embeddings_batch(texts, batch_size=2) if embeddings and embeddings[0] is not None: self.assertEqual(len(embeddings), len(texts)) self.assertIsInstance(embeddings[0], np.ndarray) def test_cosine_similarity(self): """Test cosine similarity calculation.""" vec1 = np.array([1.0, 0.0, 0.0]) vec2 = np.array([1.0, 0.0, 0.0]) similarity = cosine_similarity(vec1, vec2) self.assertAlmostEqual(similarity, 1.0, places=5) vec3 = np.array([0.0, 1.0, 0.0]) similarity2 = cosine_similarity(vec1, vec3) self.assertAlmostEqual(similarity2, 0.0, places=5) def test_cosine_similarity_orthogonal(self): """Test cosine similarity for orthogonal vectors.""" vec1 = np.array([1.0, 0.0]) vec2 = np.array([0.0, 1.0]) similarity = cosine_similarity(vec1, vec2) self.assertAlmostEqual(similarity, 0.0, places=5) def test_get_embedding_dimension(self): """Test getting embedding dimension.""" dim = get_embedding_dimension() # Dimension might be 0 if model not available self.assertIsInstance(dim, int) self.assertGreaterEqual(dim, 0) def test_similar_texts_have_similar_embeddings(self): """Test that similar texts produce similar embeddings.""" text1 = "Thủ tục đăng ký cư trú" text2 = "Đăng ký thủ tục cư trú" text3 = "Mức phạt giao thông" emb1 = generate_embedding(text1) emb2 = generate_embedding(text2) emb3 = generate_embedding(text3) if emb1 is not None and emb2 is not None and emb3 is not None: sim_similar = cosine_similarity(emb1, emb2) sim_different = cosine_similarity(emb1, emb3) # Similar texts should have higher similarity self.assertGreater(sim_similar, sim_different) class EmbeddingUtilsTestCase(TestCase): """Test embedding utility functions.""" def test_save_and_load_embedding(self): """Test saving and loading embeddings.""" from hue_portal.core.models import Procedure # Create a test procedure procedure = Procedure.objects.create( title="Test Procedure", domain="Test" ) # Create a dummy embedding dummy_embedding = np.random.rand(384).astype(np.float32) # Save embedding success = save_embedding(procedure, dummy_embedding) self.assertTrue(success) # Reload from database procedure.refresh_from_db() # Load embedding loaded_embedding = load_embedding(procedure) self.assertIsNotNone(loaded_embedding) self.assertTrue(np.allclose(dummy_embedding, loaded_embedding)) def test_has_embedding(self): """Test checking if instance has embedding.""" from hue_portal.core.models import Procedure procedure = Procedure.objects.create( title="Test Procedure", domain="Test" ) # Initially no embedding self.assertFalse(has_embedding(procedure)) # Add embedding dummy_embedding = np.random.rand(384).astype(np.float32) save_embedding(procedure, dummy_embedding) # Refresh and check procedure.refresh_from_db() self.assertTrue(has_embedding(procedure))