AmrYassinIsFree commited on
Commit
f56dbf3
·
1 Parent(s): 173f28e

add fastembed lib

Browse files
Files changed (8) hide show
  1. README.md +24 -4
  2. bench.py +4 -5
  3. evals/memory.py +8 -6
  4. evals/quality.py +14 -7
  5. evals/speed.py +1 -3
  6. models.py +12 -0
  7. requirements.txt +3 -0
  8. wrapper.py +60 -0
README.md CHANGED
@@ -4,10 +4,18 @@ Compare text embedding models across retrieval quality, inference speed, and mem
4
 
5
  ## Models
6
 
7
- | Key | Model | Role |
8
- |-----|-------|------|
9
- | `mpnet` | `sentence-transformers/all-mpnet-base-v2` | Baseline |
10
- | `bge-small` | `BAAI/bge-small-en-v1.5` | |
 
 
 
 
 
 
 
 
11
 
12
  ## Setup
13
 
@@ -26,6 +34,9 @@ python bench.py
26
  # Specific models
27
  python bench.py --models mpnet bge-small
28
 
 
 
 
29
  # Skip expensive evals
30
  python bench.py --skip-quality
31
  python bench.py --skip-memory
@@ -47,10 +58,18 @@ python bench.py --corpus-size 500 --batch-size 32 --num-runs 5
47
  Edit `models.py` and add an entry to `REGISTRY`:
48
 
49
  ```python
 
50
  "e5-small": ModelConfig(
51
  name="e5-small-v2",
52
  model_id="intfloat/e5-small-v2",
53
  ),
 
 
 
 
 
 
 
54
  ```
55
 
56
  ## Project structure
@@ -59,6 +78,7 @@ Edit `models.py` and add an entry to `REGISTRY`:
59
  embedding-bench/
60
  ├── bench.py # CLI entry point
61
  ├── models.py # Model registry
 
62
  ├── corpus.py # Sentence corpus builder
63
  ├── report.py # Table formatting
64
  ├── evals/
 
4
 
5
  ## Models
6
 
7
+ | Key | Model | Backend | Role |
8
+ |-----|-------|---------|------|
9
+ | `mpnet` | `sentence-transformers/all-mpnet-base-v2` | sbert | Baseline |
10
+ | `bge-small` | `BAAI/bge-small-en-v1.5` | sbert | |
11
+ | `bge-small-fe` | `BAAI/bge-small-en-v1.5` | fastembed | |
12
+ | `all-minilm-fe` | `sentence-transformers/all-MiniLM-L6-v2` | fastembed | |
13
+
14
+ Three backends are supported:
15
+
16
+ - **sbert** — [sentence-transformers](https://www.sbert.net/) (PyTorch). Default.
17
+ - **fastembed** — [qdrant/fastembed](https://github.com/qdrant/fastembed) (ONNX Runtime). Lighter and often faster.
18
+ - **gguf** — [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) for quantised GGUF models.
19
 
20
  ## Setup
21
 
 
34
  # Specific models
35
  python bench.py --models mpnet bge-small
36
 
37
+ # Compare the same model across backends
38
+ python bench.py --models bge-small bge-small-fe
39
+
40
  # Skip expensive evals
41
  python bench.py --skip-quality
42
  python bench.py --skip-memory
 
58
  Edit `models.py` and add an entry to `REGISTRY`:
59
 
60
  ```python
61
+ # sentence-transformers backend (default)
62
  "e5-small": ModelConfig(
63
  name="e5-small-v2",
64
  model_id="intfloat/e5-small-v2",
65
  ),
66
+
67
+ # fastembed backend
68
+ "e5-small-fe": ModelConfig(
69
+ name="e5-small-v2 (fastembed)",
70
+ model_id="intfloat/e5-small-v2",
71
+ backend="fastembed",
72
+ ),
73
  ```
74
 
75
  ## Project structure
 
78
  embedding-bench/
79
  ├── bench.py # CLI entry point
80
  ├── models.py # Model registry
81
+ ├── wrapper.py # Backend wrappers (sbert, fastembed, gguf)
82
  ├── corpus.py # Sentence corpus builder
83
  ├── report.py # Table formatting
84
  ├── evals/
bench.py CHANGED
@@ -2,12 +2,11 @@ from __future__ import annotations
2
 
3
  import argparse
4
 
5
- from sentence_transformers import SentenceTransformer
6
-
7
  from corpus import build_corpus
8
  from evals import evaluate_memory, evaluate_quality, evaluate_speed
9
  from models import REGISTRY
10
  from report import print_report
 
11
 
12
 
13
  def main(argv: list[str] | None = None) -> None:
@@ -49,21 +48,21 @@ def main(argv: list[str] | None = None) -> None:
49
 
50
  if not args.skip_quality:
51
  print(" Evaluating quality (STS Benchmark)...")
52
- model = SentenceTransformer(cfg.model_id)
53
  result["quality"] = evaluate_quality(model)
54
  print(f" Quality: {result['quality']:.4f}")
55
  del model
56
 
57
  if not args.skip_speed and corpus is not None:
58
  print(f" Evaluating speed ({args.num_runs} runs, {args.corpus_size} sentences)...")
59
- model = SentenceTransformer(cfg.model_id)
60
  result["speed"] = evaluate_speed(model, corpus, num_runs=args.num_runs, batch_size=args.batch_size)
61
  print(f" Speed: {result['speed']['sentences_per_second']} sent/s")
62
  del model
63
 
64
  if not args.skip_memory and corpus is not None:
65
  print(" Evaluating memory (isolated subprocess)...")
66
- result["memory_mb"] = evaluate_memory(cfg.model_id, corpus, batch_size=args.batch_size)
67
  print(f" Memory: {result['memory_mb']} MB")
68
 
69
  results.append(result)
 
2
 
3
  import argparse
4
 
 
 
5
  from corpus import build_corpus
6
  from evals import evaluate_memory, evaluate_quality, evaluate_speed
7
  from models import REGISTRY
8
  from report import print_report
9
+ from wrapper import load_model
10
 
11
 
12
  def main(argv: list[str] | None = None) -> None:
 
48
 
49
  if not args.skip_quality:
50
  print(" Evaluating quality (STS Benchmark)...")
51
+ model = load_model(cfg)
52
  result["quality"] = evaluate_quality(model)
53
  print(f" Quality: {result['quality']:.4f}")
54
  del model
55
 
56
  if not args.skip_speed and corpus is not None:
57
  print(f" Evaluating speed ({args.num_runs} runs, {args.corpus_size} sentences)...")
58
+ model = load_model(cfg)
59
  result["speed"] = evaluate_speed(model, corpus, num_runs=args.num_runs, batch_size=args.batch_size)
60
  print(f" Speed: {result['speed']['sentences_per_second']} sent/s")
61
  del model
62
 
63
  if not args.skip_memory and corpus is not None:
64
  print(" Evaluating memory (isolated subprocess)...")
65
+ result["memory_mb"] = evaluate_memory(cfg.model_id, corpus, batch_size=args.batch_size, backend=cfg.backend)
66
  print(f" Memory: {result['memory_mb']} MB")
67
 
68
  results.append(result)
evals/memory.py CHANGED
@@ -4,23 +4,25 @@ import multiprocessing
4
  import os
5
 
6
 
7
- def _measure(model_id: str, sentences: list[str], batch_size: int, queue: multiprocessing.Queue) -> None:
8
  import psutil
9
- from sentence_transformers import SentenceTransformer
 
10
 
11
  process = psutil.Process(os.getpid())
12
  baseline = process.memory_info().rss
13
- model = SentenceTransformer(model_id)
14
- model.encode(sentences, batch_size=batch_size, show_progress_bar=False)
 
15
  peak = process.memory_info().rss
16
  queue.put(peak - baseline)
17
 
18
 
19
- def evaluate_memory(model_id: str, sentences: list[str], batch_size: int = 64) -> float:
20
  """Return memory delta in MB, measured in an isolated subprocess."""
21
  ctx = multiprocessing.get_context("spawn")
22
  q = ctx.Queue()
23
- p = ctx.Process(target=_measure, args=(model_id, sentences, batch_size, q))
24
  p.start()
25
  p.join()
26
  bytes_delta = q.get()
 
4
  import os
5
 
6
 
7
+ def _measure(model_id: str, backend: str, sentences: list[str], batch_size: int, queue: multiprocessing.Queue) -> None:
8
  import psutil
9
+ from models import ModelConfig
10
+ from wrapper import load_model
11
 
12
  process = psutil.Process(os.getpid())
13
  baseline = process.memory_info().rss
14
+ cfg = ModelConfig(name="", model_id=model_id, backend=backend)
15
+ model = load_model(cfg)
16
+ model.encode(sentences, batch_size=batch_size)
17
  peak = process.memory_info().rss
18
  queue.put(peak - baseline)
19
 
20
 
21
+ def evaluate_memory(model_id: str, sentences: list[str], batch_size: int = 64, backend: str = "sbert") -> float:
22
  """Return memory delta in MB, measured in an isolated subprocess."""
23
  ctx = multiprocessing.get_context("spawn")
24
  q = ctx.Queue()
25
+ p = ctx.Process(target=_measure, args=(model_id, backend, sentences, batch_size, q))
26
  p.start()
27
  p.join()
28
  bytes_delta = q.get()
evals/quality.py CHANGED
@@ -1,17 +1,24 @@
1
  from __future__ import annotations
2
 
 
3
  from datasets import load_dataset
4
- from sentence_transformers import SentenceTransformer
5
- from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator
6
 
7
 
8
- def evaluate_quality(model: SentenceTransformer) -> float:
9
  """Return Spearman correlation on the STS Benchmark test set."""
10
  dataset = load_dataset("mteb/stsbenchmark-sts", split="test")
11
  sentences1 = list(dataset["sentence1"])
12
  sentences2 = list(dataset["sentence2"])
13
- scores = [s / 5.0 for s in dataset["score"]]
14
 
15
- evaluator = EmbeddingSimilarityEvaluator(sentences1, sentences2, scores)
16
- results = evaluator(model)
17
- return results["spearman_cosine"]
 
 
 
 
 
 
 
 
1
  from __future__ import annotations
2
 
3
+ import numpy as np
4
  from datasets import load_dataset
5
+ from scipy.stats import spearmanr
 
6
 
7
 
8
+ def evaluate_quality(model) -> float:
9
  """Return Spearman correlation on the STS Benchmark test set."""
10
  dataset = load_dataset("mteb/stsbenchmark-sts", split="test")
11
  sentences1 = list(dataset["sentence1"])
12
  sentences2 = list(dataset["sentence2"])
13
+ gold_scores = [s / 5.0 for s in dataset["score"]]
14
 
15
+ emb1 = model.encode(sentences1)
16
+ emb2 = model.encode(sentences2)
17
+
18
+ # Row-wise cosine similarity
19
+ cos_sims = np.sum(emb1 * emb2, axis=1) / (
20
+ np.linalg.norm(emb1, axis=1) * np.linalg.norm(emb2, axis=1)
21
+ )
22
+
23
+ correlation, _ = spearmanr(cos_sims, gold_scores)
24
+ return correlation
evals/speed.py CHANGED
@@ -3,11 +3,9 @@ from __future__ import annotations
3
  import statistics
4
  import time
5
 
6
- from sentence_transformers import SentenceTransformer
7
-
8
 
9
  def evaluate_speed(
10
- model: SentenceTransformer,
11
  sentences: list[str],
12
  num_runs: int = 3,
13
  batch_size: int = 64,
 
3
  import statistics
4
  import time
5
 
 
 
6
 
7
  def evaluate_speed(
8
+ model,
9
  sentences: list[str],
10
  num_runs: int = 3,
11
  batch_size: int = 64,
models.py CHANGED
@@ -8,6 +8,8 @@ class ModelConfig:
8
  name: str
9
  model_id: str
10
  is_baseline: bool = False
 
 
11
 
12
 
13
  REGISTRY: dict[str, ModelConfig] = {
@@ -20,4 +22,14 @@ REGISTRY: dict[str, ModelConfig] = {
20
  name="bge-small-en-v1.5",
21
  model_id="BAAI/bge-small-en-v1.5",
22
  ),
 
 
 
 
 
 
 
 
 
 
23
  }
 
8
  name: str
9
  model_id: str
10
  is_baseline: bool = False
11
+ backend: str = "sbert"
12
+ gguf_file: str | None = None
13
 
14
 
15
  REGISTRY: dict[str, ModelConfig] = {
 
22
  name="bge-small-en-v1.5",
23
  model_id="BAAI/bge-small-en-v1.5",
24
  ),
25
+ "bge-small-fe": ModelConfig(
26
+ name="bge-small-en-v1.5 (fastembed)",
27
+ model_id="BAAI/bge-small-en-v1.5",
28
+ backend="fastembed",
29
+ ),
30
+ "all-minilm-fe": ModelConfig(
31
+ name="all-MiniLM-L6-v2 (fastembed)",
32
+ model_id="sentence-transformers/all-MiniLM-L6-v2",
33
+ backend="fastembed",
34
+ ),
35
  }
requirements.txt CHANGED
@@ -3,3 +3,6 @@ torch
3
  datasets
4
  psutil
5
  tabulate
 
 
 
 
3
  datasets
4
  psutil
5
  tabulate
6
+ fastembed
7
+ numpy
8
+ scipy
wrapper.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import numpy as np
4
+
5
+ from models import ModelConfig
6
+
7
+
8
+ class SBertWrapper:
9
+ """Wraps sentence_transformers.SentenceTransformer."""
10
+
11
+ def __init__(self, cfg: ModelConfig):
12
+ from sentence_transformers import SentenceTransformer
13
+ self._model = SentenceTransformer(cfg.model_id)
14
+
15
+ def encode(self, sentences: list[str], batch_size: int = 64, **kwargs) -> np.ndarray:
16
+ kwargs.setdefault("show_progress_bar", False)
17
+ return self._model.encode(sentences, batch_size=batch_size, **kwargs)
18
+
19
+
20
+ class GGUFWrapper:
21
+ """Wraps llama_cpp.Llama in embedding mode."""
22
+
23
+ def __init__(self, cfg: ModelConfig):
24
+ from huggingface_hub import hf_hub_download
25
+ from llama_cpp import Llama
26
+
27
+ path = hf_hub_download(repo_id=cfg.model_id, filename=cfg.gguf_file)
28
+ self._model = Llama(
29
+ model_path=path, embedding=True, n_ctx=512, verbose=False
30
+ )
31
+
32
+ def encode(self, sentences: list[str], batch_size: int = 64, **kwargs) -> np.ndarray:
33
+ all_embeddings = []
34
+ for i in range(0, len(sentences), batch_size):
35
+ batch = sentences[i : i + batch_size]
36
+ response = self._model.create_embedding(batch)
37
+ embeddings = [item["embedding"] for item in response["data"]]
38
+ all_embeddings.extend(embeddings)
39
+ return np.array(all_embeddings, dtype=np.float32)
40
+
41
+
42
+ class FastEmbedWrapper:
43
+ """Wraps fastembed.TextEmbedding."""
44
+
45
+ def __init__(self, cfg: ModelConfig):
46
+ from fastembed import TextEmbedding
47
+ self._model = TextEmbedding(model_name=cfg.model_id)
48
+
49
+ def encode(self, sentences: list[str], batch_size: int = 64, **kwargs) -> np.ndarray:
50
+ embeddings = list(self._model.embed(sentences, batch_size=batch_size))
51
+ return np.array(embeddings, dtype=np.float32)
52
+
53
+
54
+ def load_model(cfg: ModelConfig) -> SBertWrapper | GGUFWrapper | FastEmbedWrapper:
55
+ """Factory: returns the right wrapper for the model's backend."""
56
+ if cfg.backend == "gguf":
57
+ return GGUFWrapper(cfg)
58
+ if cfg.backend == "fastembed":
59
+ return FastEmbedWrapper(cfg)
60
+ return SBertWrapper(cfg)