Spaces:
Running
Running
AmrYassinIsFree commited on
Commit ·
f56dbf3
1
Parent(s): 173f28e
add fastembed lib
Browse files- README.md +24 -4
- bench.py +4 -5
- evals/memory.py +8 -6
- evals/quality.py +14 -7
- evals/speed.py +1 -3
- models.py +12 -0
- requirements.txt +3 -0
- wrapper.py +60 -0
README.md
CHANGED
|
@@ -4,10 +4,18 @@ Compare text embedding models across retrieval quality, inference speed, and mem
|
|
| 4 |
|
| 5 |
## Models
|
| 6 |
|
| 7 |
-
| Key | Model | Role |
|
| 8 |
-
|-----|-------|------|
|
| 9 |
-
| `mpnet` | `sentence-transformers/all-mpnet-base-v2` | Baseline |
|
| 10 |
-
| `bge-small` | `BAAI/bge-small-en-v1.5` | |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
## Setup
|
| 13 |
|
|
@@ -26,6 +34,9 @@ python bench.py
|
|
| 26 |
# Specific models
|
| 27 |
python bench.py --models mpnet bge-small
|
| 28 |
|
|
|
|
|
|
|
|
|
|
| 29 |
# Skip expensive evals
|
| 30 |
python bench.py --skip-quality
|
| 31 |
python bench.py --skip-memory
|
|
@@ -47,10 +58,18 @@ python bench.py --corpus-size 500 --batch-size 32 --num-runs 5
|
|
| 47 |
Edit `models.py` and add an entry to `REGISTRY`:
|
| 48 |
|
| 49 |
```python
|
|
|
|
| 50 |
"e5-small": ModelConfig(
|
| 51 |
name="e5-small-v2",
|
| 52 |
model_id="intfloat/e5-small-v2",
|
| 53 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
```
|
| 55 |
|
| 56 |
## Project structure
|
|
@@ -59,6 +78,7 @@ Edit `models.py` and add an entry to `REGISTRY`:
|
|
| 59 |
embedding-bench/
|
| 60 |
├── bench.py # CLI entry point
|
| 61 |
├── models.py # Model registry
|
|
|
|
| 62 |
├── corpus.py # Sentence corpus builder
|
| 63 |
├── report.py # Table formatting
|
| 64 |
├── evals/
|
|
|
|
| 4 |
|
| 5 |
## Models
|
| 6 |
|
| 7 |
+
| Key | Model | Backend | Role |
|
| 8 |
+
|-----|-------|---------|------|
|
| 9 |
+
| `mpnet` | `sentence-transformers/all-mpnet-base-v2` | sbert | Baseline |
|
| 10 |
+
| `bge-small` | `BAAI/bge-small-en-v1.5` | sbert | |
|
| 11 |
+
| `bge-small-fe` | `BAAI/bge-small-en-v1.5` | fastembed | |
|
| 12 |
+
| `all-minilm-fe` | `sentence-transformers/all-MiniLM-L6-v2` | fastembed | |
|
| 13 |
+
|
| 14 |
+
Three backends are supported:
|
| 15 |
+
|
| 16 |
+
- **sbert** — [sentence-transformers](https://www.sbert.net/) (PyTorch). Default.
|
| 17 |
+
- **fastembed** — [qdrant/fastembed](https://github.com/qdrant/fastembed) (ONNX Runtime). Lighter and often faster.
|
| 18 |
+
- **gguf** — [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) for quantised GGUF models.
|
| 19 |
|
| 20 |
## Setup
|
| 21 |
|
|
|
|
| 34 |
# Specific models
|
| 35 |
python bench.py --models mpnet bge-small
|
| 36 |
|
| 37 |
+
# Compare the same model across backends
|
| 38 |
+
python bench.py --models bge-small bge-small-fe
|
| 39 |
+
|
| 40 |
# Skip expensive evals
|
| 41 |
python bench.py --skip-quality
|
| 42 |
python bench.py --skip-memory
|
|
|
|
| 58 |
Edit `models.py` and add an entry to `REGISTRY`:
|
| 59 |
|
| 60 |
```python
|
| 61 |
+
# sentence-transformers backend (default)
|
| 62 |
"e5-small": ModelConfig(
|
| 63 |
name="e5-small-v2",
|
| 64 |
model_id="intfloat/e5-small-v2",
|
| 65 |
),
|
| 66 |
+
|
| 67 |
+
# fastembed backend
|
| 68 |
+
"e5-small-fe": ModelConfig(
|
| 69 |
+
name="e5-small-v2 (fastembed)",
|
| 70 |
+
model_id="intfloat/e5-small-v2",
|
| 71 |
+
backend="fastembed",
|
| 72 |
+
),
|
| 73 |
```
|
| 74 |
|
| 75 |
## Project structure
|
|
|
|
| 78 |
embedding-bench/
|
| 79 |
├── bench.py # CLI entry point
|
| 80 |
├── models.py # Model registry
|
| 81 |
+
├── wrapper.py # Backend wrappers (sbert, fastembed, gguf)
|
| 82 |
├── corpus.py # Sentence corpus builder
|
| 83 |
├── report.py # Table formatting
|
| 84 |
├── evals/
|
bench.py
CHANGED
|
@@ -2,12 +2,11 @@ from __future__ import annotations
|
|
| 2 |
|
| 3 |
import argparse
|
| 4 |
|
| 5 |
-
from sentence_transformers import SentenceTransformer
|
| 6 |
-
|
| 7 |
from corpus import build_corpus
|
| 8 |
from evals import evaluate_memory, evaluate_quality, evaluate_speed
|
| 9 |
from models import REGISTRY
|
| 10 |
from report import print_report
|
|
|
|
| 11 |
|
| 12 |
|
| 13 |
def main(argv: list[str] | None = None) -> None:
|
|
@@ -49,21 +48,21 @@ def main(argv: list[str] | None = None) -> None:
|
|
| 49 |
|
| 50 |
if not args.skip_quality:
|
| 51 |
print(" Evaluating quality (STS Benchmark)...")
|
| 52 |
-
model =
|
| 53 |
result["quality"] = evaluate_quality(model)
|
| 54 |
print(f" Quality: {result['quality']:.4f}")
|
| 55 |
del model
|
| 56 |
|
| 57 |
if not args.skip_speed and corpus is not None:
|
| 58 |
print(f" Evaluating speed ({args.num_runs} runs, {args.corpus_size} sentences)...")
|
| 59 |
-
model =
|
| 60 |
result["speed"] = evaluate_speed(model, corpus, num_runs=args.num_runs, batch_size=args.batch_size)
|
| 61 |
print(f" Speed: {result['speed']['sentences_per_second']} sent/s")
|
| 62 |
del model
|
| 63 |
|
| 64 |
if not args.skip_memory and corpus is not None:
|
| 65 |
print(" Evaluating memory (isolated subprocess)...")
|
| 66 |
-
result["memory_mb"] = evaluate_memory(cfg.model_id, corpus, batch_size=args.batch_size)
|
| 67 |
print(f" Memory: {result['memory_mb']} MB")
|
| 68 |
|
| 69 |
results.append(result)
|
|
|
|
| 2 |
|
| 3 |
import argparse
|
| 4 |
|
|
|
|
|
|
|
| 5 |
from corpus import build_corpus
|
| 6 |
from evals import evaluate_memory, evaluate_quality, evaluate_speed
|
| 7 |
from models import REGISTRY
|
| 8 |
from report import print_report
|
| 9 |
+
from wrapper import load_model
|
| 10 |
|
| 11 |
|
| 12 |
def main(argv: list[str] | None = None) -> None:
|
|
|
|
| 48 |
|
| 49 |
if not args.skip_quality:
|
| 50 |
print(" Evaluating quality (STS Benchmark)...")
|
| 51 |
+
model = load_model(cfg)
|
| 52 |
result["quality"] = evaluate_quality(model)
|
| 53 |
print(f" Quality: {result['quality']:.4f}")
|
| 54 |
del model
|
| 55 |
|
| 56 |
if not args.skip_speed and corpus is not None:
|
| 57 |
print(f" Evaluating speed ({args.num_runs} runs, {args.corpus_size} sentences)...")
|
| 58 |
+
model = load_model(cfg)
|
| 59 |
result["speed"] = evaluate_speed(model, corpus, num_runs=args.num_runs, batch_size=args.batch_size)
|
| 60 |
print(f" Speed: {result['speed']['sentences_per_second']} sent/s")
|
| 61 |
del model
|
| 62 |
|
| 63 |
if not args.skip_memory and corpus is not None:
|
| 64 |
print(" Evaluating memory (isolated subprocess)...")
|
| 65 |
+
result["memory_mb"] = evaluate_memory(cfg.model_id, corpus, batch_size=args.batch_size, backend=cfg.backend)
|
| 66 |
print(f" Memory: {result['memory_mb']} MB")
|
| 67 |
|
| 68 |
results.append(result)
|
evals/memory.py
CHANGED
|
@@ -4,23 +4,25 @@ import multiprocessing
|
|
| 4 |
import os
|
| 5 |
|
| 6 |
|
| 7 |
-
def _measure(model_id: str, sentences: list[str], batch_size: int, queue: multiprocessing.Queue) -> None:
|
| 8 |
import psutil
|
| 9 |
-
from
|
|
|
|
| 10 |
|
| 11 |
process = psutil.Process(os.getpid())
|
| 12 |
baseline = process.memory_info().rss
|
| 13 |
-
|
| 14 |
-
model
|
|
|
|
| 15 |
peak = process.memory_info().rss
|
| 16 |
queue.put(peak - baseline)
|
| 17 |
|
| 18 |
|
| 19 |
-
def evaluate_memory(model_id: str, sentences: list[str], batch_size: int = 64) -> float:
|
| 20 |
"""Return memory delta in MB, measured in an isolated subprocess."""
|
| 21 |
ctx = multiprocessing.get_context("spawn")
|
| 22 |
q = ctx.Queue()
|
| 23 |
-
p = ctx.Process(target=_measure, args=(model_id, sentences, batch_size, q))
|
| 24 |
p.start()
|
| 25 |
p.join()
|
| 26 |
bytes_delta = q.get()
|
|
|
|
| 4 |
import os
|
| 5 |
|
| 6 |
|
| 7 |
+
def _measure(model_id: str, backend: str, sentences: list[str], batch_size: int, queue: multiprocessing.Queue) -> None:
|
| 8 |
import psutil
|
| 9 |
+
from models import ModelConfig
|
| 10 |
+
from wrapper import load_model
|
| 11 |
|
| 12 |
process = psutil.Process(os.getpid())
|
| 13 |
baseline = process.memory_info().rss
|
| 14 |
+
cfg = ModelConfig(name="", model_id=model_id, backend=backend)
|
| 15 |
+
model = load_model(cfg)
|
| 16 |
+
model.encode(sentences, batch_size=batch_size)
|
| 17 |
peak = process.memory_info().rss
|
| 18 |
queue.put(peak - baseline)
|
| 19 |
|
| 20 |
|
| 21 |
+
def evaluate_memory(model_id: str, sentences: list[str], batch_size: int = 64, backend: str = "sbert") -> float:
|
| 22 |
"""Return memory delta in MB, measured in an isolated subprocess."""
|
| 23 |
ctx = multiprocessing.get_context("spawn")
|
| 24 |
q = ctx.Queue()
|
| 25 |
+
p = ctx.Process(target=_measure, args=(model_id, backend, sentences, batch_size, q))
|
| 26 |
p.start()
|
| 27 |
p.join()
|
| 28 |
bytes_delta = q.get()
|
evals/quality.py
CHANGED
|
@@ -1,17 +1,24 @@
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
|
|
|
| 3 |
from datasets import load_dataset
|
| 4 |
-
from
|
| 5 |
-
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator
|
| 6 |
|
| 7 |
|
| 8 |
-
def evaluate_quality(model
|
| 9 |
"""Return Spearman correlation on the STS Benchmark test set."""
|
| 10 |
dataset = load_dataset("mteb/stsbenchmark-sts", split="test")
|
| 11 |
sentences1 = list(dataset["sentence1"])
|
| 12 |
sentences2 = list(dataset["sentence2"])
|
| 13 |
-
|
| 14 |
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
+
import numpy as np
|
| 4 |
from datasets import load_dataset
|
| 5 |
+
from scipy.stats import spearmanr
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
+
def evaluate_quality(model) -> float:
|
| 9 |
"""Return Spearman correlation on the STS Benchmark test set."""
|
| 10 |
dataset = load_dataset("mteb/stsbenchmark-sts", split="test")
|
| 11 |
sentences1 = list(dataset["sentence1"])
|
| 12 |
sentences2 = list(dataset["sentence2"])
|
| 13 |
+
gold_scores = [s / 5.0 for s in dataset["score"]]
|
| 14 |
|
| 15 |
+
emb1 = model.encode(sentences1)
|
| 16 |
+
emb2 = model.encode(sentences2)
|
| 17 |
+
|
| 18 |
+
# Row-wise cosine similarity
|
| 19 |
+
cos_sims = np.sum(emb1 * emb2, axis=1) / (
|
| 20 |
+
np.linalg.norm(emb1, axis=1) * np.linalg.norm(emb2, axis=1)
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
correlation, _ = spearmanr(cos_sims, gold_scores)
|
| 24 |
+
return correlation
|
evals/speed.py
CHANGED
|
@@ -3,11 +3,9 @@ from __future__ import annotations
|
|
| 3 |
import statistics
|
| 4 |
import time
|
| 5 |
|
| 6 |
-
from sentence_transformers import SentenceTransformer
|
| 7 |
-
|
| 8 |
|
| 9 |
def evaluate_speed(
|
| 10 |
-
model
|
| 11 |
sentences: list[str],
|
| 12 |
num_runs: int = 3,
|
| 13 |
batch_size: int = 64,
|
|
|
|
| 3 |
import statistics
|
| 4 |
import time
|
| 5 |
|
|
|
|
|
|
|
| 6 |
|
| 7 |
def evaluate_speed(
|
| 8 |
+
model,
|
| 9 |
sentences: list[str],
|
| 10 |
num_runs: int = 3,
|
| 11 |
batch_size: int = 64,
|
models.py
CHANGED
|
@@ -8,6 +8,8 @@ class ModelConfig:
|
|
| 8 |
name: str
|
| 9 |
model_id: str
|
| 10 |
is_baseline: bool = False
|
|
|
|
|
|
|
| 11 |
|
| 12 |
|
| 13 |
REGISTRY: dict[str, ModelConfig] = {
|
|
@@ -20,4 +22,14 @@ REGISTRY: dict[str, ModelConfig] = {
|
|
| 20 |
name="bge-small-en-v1.5",
|
| 21 |
model_id="BAAI/bge-small-en-v1.5",
|
| 22 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
}
|
|
|
|
| 8 |
name: str
|
| 9 |
model_id: str
|
| 10 |
is_baseline: bool = False
|
| 11 |
+
backend: str = "sbert"
|
| 12 |
+
gguf_file: str | None = None
|
| 13 |
|
| 14 |
|
| 15 |
REGISTRY: dict[str, ModelConfig] = {
|
|
|
|
| 22 |
name="bge-small-en-v1.5",
|
| 23 |
model_id="BAAI/bge-small-en-v1.5",
|
| 24 |
),
|
| 25 |
+
"bge-small-fe": ModelConfig(
|
| 26 |
+
name="bge-small-en-v1.5 (fastembed)",
|
| 27 |
+
model_id="BAAI/bge-small-en-v1.5",
|
| 28 |
+
backend="fastembed",
|
| 29 |
+
),
|
| 30 |
+
"all-minilm-fe": ModelConfig(
|
| 31 |
+
name="all-MiniLM-L6-v2 (fastembed)",
|
| 32 |
+
model_id="sentence-transformers/all-MiniLM-L6-v2",
|
| 33 |
+
backend="fastembed",
|
| 34 |
+
),
|
| 35 |
}
|
requirements.txt
CHANGED
|
@@ -3,3 +3,6 @@ torch
|
|
| 3 |
datasets
|
| 4 |
psutil
|
| 5 |
tabulate
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
datasets
|
| 4 |
psutil
|
| 5 |
tabulate
|
| 6 |
+
fastembed
|
| 7 |
+
numpy
|
| 8 |
+
scipy
|
wrapper.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
from models import ModelConfig
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class SBertWrapper:
|
| 9 |
+
"""Wraps sentence_transformers.SentenceTransformer."""
|
| 10 |
+
|
| 11 |
+
def __init__(self, cfg: ModelConfig):
|
| 12 |
+
from sentence_transformers import SentenceTransformer
|
| 13 |
+
self._model = SentenceTransformer(cfg.model_id)
|
| 14 |
+
|
| 15 |
+
def encode(self, sentences: list[str], batch_size: int = 64, **kwargs) -> np.ndarray:
|
| 16 |
+
kwargs.setdefault("show_progress_bar", False)
|
| 17 |
+
return self._model.encode(sentences, batch_size=batch_size, **kwargs)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class GGUFWrapper:
|
| 21 |
+
"""Wraps llama_cpp.Llama in embedding mode."""
|
| 22 |
+
|
| 23 |
+
def __init__(self, cfg: ModelConfig):
|
| 24 |
+
from huggingface_hub import hf_hub_download
|
| 25 |
+
from llama_cpp import Llama
|
| 26 |
+
|
| 27 |
+
path = hf_hub_download(repo_id=cfg.model_id, filename=cfg.gguf_file)
|
| 28 |
+
self._model = Llama(
|
| 29 |
+
model_path=path, embedding=True, n_ctx=512, verbose=False
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
def encode(self, sentences: list[str], batch_size: int = 64, **kwargs) -> np.ndarray:
|
| 33 |
+
all_embeddings = []
|
| 34 |
+
for i in range(0, len(sentences), batch_size):
|
| 35 |
+
batch = sentences[i : i + batch_size]
|
| 36 |
+
response = self._model.create_embedding(batch)
|
| 37 |
+
embeddings = [item["embedding"] for item in response["data"]]
|
| 38 |
+
all_embeddings.extend(embeddings)
|
| 39 |
+
return np.array(all_embeddings, dtype=np.float32)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
class FastEmbedWrapper:
|
| 43 |
+
"""Wraps fastembed.TextEmbedding."""
|
| 44 |
+
|
| 45 |
+
def __init__(self, cfg: ModelConfig):
|
| 46 |
+
from fastembed import TextEmbedding
|
| 47 |
+
self._model = TextEmbedding(model_name=cfg.model_id)
|
| 48 |
+
|
| 49 |
+
def encode(self, sentences: list[str], batch_size: int = 64, **kwargs) -> np.ndarray:
|
| 50 |
+
embeddings = list(self._model.embed(sentences, batch_size=batch_size))
|
| 51 |
+
return np.array(embeddings, dtype=np.float32)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def load_model(cfg: ModelConfig) -> SBertWrapper | GGUFWrapper | FastEmbedWrapper:
|
| 55 |
+
"""Factory: returns the right wrapper for the model's backend."""
|
| 56 |
+
if cfg.backend == "gguf":
|
| 57 |
+
return GGUFWrapper(cfg)
|
| 58 |
+
if cfg.backend == "fastembed":
|
| 59 |
+
return FastEmbedWrapper(cfg)
|
| 60 |
+
return SBertWrapper(cfg)
|