File size: 1,314 Bytes
f2ddb73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from dataclasses import dataclass
import os
from typing import Literal

EMBEDDING_MODEL = "pgc/bge-m3-onnx"
EMBEDDING_DIM = 1024


@dataclass(frozen=True, slots=True)
class EmbeddingRuntimeConfig:
    model_name: str
    source: str
    model_file: str
    providers: list[str]
    dim: int = EMBEDDING_DIM


def get_runtime_config(runtime: Literal["query", "ingest"]) -> EmbeddingRuntimeConfig:
    # Xenova/bge-m3 provides self-contained single-file quantized ONNX variants
    # (model_int8.onnx, ~568 MB) whereas BAAI/bge-m3 only has model.onnx +
    # model.onnx_data (external-data format, 2.3 GB), which fastembed cannot load
    # as a custom model without special handling.
    if runtime == "ingest":
        return EmbeddingRuntimeConfig(
            model_name=EMBEDDING_MODEL,
            source=os.getenv("PGC_BGE_M3_INGEST_SOURCE", "Xenova/bge-m3"),
            model_file=os.getenv("PGC_BGE_M3_INGEST_MODEL_FILE", "onnx/model_int8.onnx"),
            providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
        )
    return EmbeddingRuntimeConfig(
        model_name=EMBEDDING_MODEL,
        source=os.getenv("PGC_BGE_M3_QUERY_SOURCE", "Xenova/bge-m3"),
        model_file=os.getenv("PGC_BGE_M3_QUERY_MODEL_FILE", "onnx/model_int8.onnx"),
        providers=["CPUExecutionProvider"],
    )