Spaces:
Running
Running
File size: 1,314 Bytes
f2ddb73 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | from dataclasses import dataclass
import os
from typing import Literal
EMBEDDING_MODEL = "pgc/bge-m3-onnx"
EMBEDDING_DIM = 1024
@dataclass(frozen=True, slots=True)
class EmbeddingRuntimeConfig:
model_name: str
source: str
model_file: str
providers: list[str]
dim: int = EMBEDDING_DIM
def get_runtime_config(runtime: Literal["query", "ingest"]) -> EmbeddingRuntimeConfig:
# Xenova/bge-m3 provides self-contained single-file quantized ONNX variants
# (model_int8.onnx, ~568 MB) whereas BAAI/bge-m3 only has model.onnx +
# model.onnx_data (external-data format, 2.3 GB), which fastembed cannot load
# as a custom model without special handling.
if runtime == "ingest":
return EmbeddingRuntimeConfig(
model_name=EMBEDDING_MODEL,
source=os.getenv("PGC_BGE_M3_INGEST_SOURCE", "Xenova/bge-m3"),
model_file=os.getenv("PGC_BGE_M3_INGEST_MODEL_FILE", "onnx/model_int8.onnx"),
providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
)
return EmbeddingRuntimeConfig(
model_name=EMBEDDING_MODEL,
source=os.getenv("PGC_BGE_M3_QUERY_SOURCE", "Xenova/bge-m3"),
model_file=os.getenv("PGC_BGE_M3_QUERY_MODEL_FILE", "onnx/model_int8.onnx"),
providers=["CPUExecutionProvider"],
)
|