Spaces:
Running
Running
| from dataclasses import dataclass | |
| import os | |
| from typing import Literal | |
| EMBEDDING_MODEL = "pgc/bge-m3-onnx" | |
| EMBEDDING_DIM = 1024 | |
| class EmbeddingRuntimeConfig: | |
| model_name: str | |
| source: str | |
| model_file: str | |
| providers: list[str] | |
| dim: int = EMBEDDING_DIM | |
| def get_runtime_config(runtime: Literal["query", "ingest"]) -> EmbeddingRuntimeConfig: | |
| # Xenova/bge-m3 provides self-contained single-file quantized ONNX variants | |
| # (model_int8.onnx, ~568 MB) whereas BAAI/bge-m3 only has model.onnx + | |
| # model.onnx_data (external-data format, 2.3 GB), which fastembed cannot load | |
| # as a custom model without special handling. | |
| if runtime == "ingest": | |
| return EmbeddingRuntimeConfig( | |
| model_name=EMBEDDING_MODEL, | |
| source=os.getenv("PGC_BGE_M3_INGEST_SOURCE", "Xenova/bge-m3"), | |
| model_file=os.getenv("PGC_BGE_M3_INGEST_MODEL_FILE", "onnx/model_int8.onnx"), | |
| providers=["CUDAExecutionProvider", "CPUExecutionProvider"], | |
| ) | |
| return EmbeddingRuntimeConfig( | |
| model_name=EMBEDDING_MODEL, | |
| source=os.getenv("PGC_BGE_M3_QUERY_SOURCE", "Xenova/bge-m3"), | |
| model_file=os.getenv("PGC_BGE_M3_QUERY_MODEL_FILE", "onnx/model_int8.onnx"), | |
| providers=["CPUExecutionProvider"], | |
| ) | |