File size: 708 Bytes
63105da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
from . import sentence_transformer_embed

EMBEDDING_BACKENDS = {
    "sentence_transformers": sentence_transformer_embed
}

def embed_chunks(chunks, backend: str, model_name: str, version: str = None):
    mod = EMBEDDING_BACKENDS.get(backend)
    if not mod:
        raise ValueError(f"Unknown backend: {backend}")
    texts = [c["text"] if isinstance(c, dict) else c for c in chunks]
    metas = [c.get("meta", {}) if isinstance(c, dict) else {} for c in chunks]
    embeddings = mod.embed(texts, model_name)
    version = version or f"{backend}:{model_name}"
    return [
        {"embedding": emb, "meta": meta, "version": version}
        for emb, meta in zip(embeddings, metas)
    ]