Soumik Bose commited on
Commit
0ba7ee8
·
1 Parent(s): 9136d4c
Files changed (1) hide show
  1. test_local.py +0 -103
test_local.py DELETED
@@ -1,103 +0,0 @@
1
- import time
2
- from model_service import LocalEmbeddingService
3
-
4
- # Configuration
5
- LOCAL_MODEL_PATH = './models/bge-base-en-v1.5'
6
-
7
- def test_single_text():
8
- """Test embedding generation for a single text."""
9
- service = LocalEmbeddingService(LOCAL_MODEL_PATH)
10
-
11
- text = "Ginger was also a smart giraffe. She knew what was wrong."
12
-
13
- print(f"\n{'='*60}")
14
- print("Testing single text embedding")
15
- print(f"{'='*60}")
16
- print(f"Text: '{text}'")
17
-
18
- start_time = time.time()
19
- vector = service.generate_embedding(text)
20
- end_time = time.time()
21
-
22
- print(f"\n✅ Embedding generated in {end_time - start_time:.4f} seconds")
23
- print(f"Dimensions: {len(vector)}")
24
- print(f"First 10 values: {vector[:10]}")
25
- print(f"Vector norm (should be ~1.0): {sum(x**2 for x in vector)**0.5:.4f}")
26
-
27
- def test_batch_texts():
28
- """Test embedding generation for multiple texts."""
29
- service = LocalEmbeddingService(LOCAL_MODEL_PATH)
30
-
31
- texts = [
32
- "The quick brown fox jumps over the lazy dog.",
33
- "Machine learning is transforming technology.",
34
- "Embeddings capture semantic meaning of text."
35
- ]
36
-
37
- print(f"\n{'='*60}")
38
- print("Testing batch text embeddings")
39
- print(f"{'='*60}")
40
- print(f"Number of texts: {len(texts)}")
41
-
42
- start_time = time.time()
43
- vectors = service.generate_embedding(texts)
44
- end_time = time.time()
45
-
46
- print(f"\n✅ {len(vectors)} embeddings generated in {end_time - start_time:.4f} seconds")
47
- print(f"Average time per text: {(end_time - start_time) / len(texts):.4f} seconds")
48
- print(f"Each embedding dimension: {len(vectors[0])}")
49
-
50
- # Show first embedding sample
51
- print(f"\nFirst embedding (first 10 values): {vectors[0][:10]}")
52
-
53
- def test_similarity():
54
- """Test cosine similarity between embeddings."""
55
- service = LocalEmbeddingService(LOCAL_MODEL_PATH)
56
-
57
- texts = [
58
- "The cat sits on the mat.",
59
- "A feline rests on the rug.", # Similar meaning
60
- "Python is a programming language." # Different meaning
61
- ]
62
-
63
- print(f"\n{'='*60}")
64
- print("Testing semantic similarity")
65
- print(f"{'='*60}")
66
-
67
- vectors = service.generate_embedding(texts)
68
-
69
- # Calculate cosine similarities (vectors are already normalized)
70
- def cosine_sim(v1, v2):
71
- return sum(a * b for a, b in zip(v1, v2))
72
-
73
- sim_01 = cosine_sim(vectors[0], vectors[1])
74
- sim_02 = cosine_sim(vectors[0], vectors[2])
75
-
76
- print(f"\nText 1: '{texts[0]}'")
77
- print(f"Text 2: '{texts[1]}'")
78
- print(f"Similarity: {sim_01:.4f} (similar meaning)")
79
-
80
- print(f"\nText 1: '{texts[0]}'")
81
- print(f"Text 3: '{texts[2]}'")
82
- print(f"Similarity: {sim_02:.4f} (different meaning)")
83
-
84
- print(f"\n✅ As expected, similar texts have higher similarity!")
85
-
86
- def main():
87
- """Run all tests."""
88
- try:
89
- test_single_text()
90
- test_batch_texts()
91
- test_similarity()
92
-
93
- print(f"\n{'='*60}")
94
- print("✅ All tests completed successfully!")
95
- print(f"{'='*60}\n")
96
-
97
- except FileNotFoundError:
98
- print("\n❌ Model not found. Please run download_model.py first.")
99
- except Exception as e:
100
- print(f"\n❌ An error occurred: {e}")
101
-
102
- if __name__ == "__main__":
103
- main()