PGC-AI-Chatbot / app /embedding_runtime.py
Jacooo's picture
Deploy from GitHub: 4925109
f2ddb73 verified
from dataclasses import dataclass
import os
from typing import Literal
EMBEDDING_MODEL = "pgc/bge-m3-onnx"
EMBEDDING_DIM = 1024
@dataclass(frozen=True, slots=True)
class EmbeddingRuntimeConfig:
model_name: str
source: str
model_file: str
providers: list[str]
dim: int = EMBEDDING_DIM
def get_runtime_config(runtime: Literal["query", "ingest"]) -> EmbeddingRuntimeConfig:
# Xenova/bge-m3 provides self-contained single-file quantized ONNX variants
# (model_int8.onnx, ~568 MB) whereas BAAI/bge-m3 only has model.onnx +
# model.onnx_data (external-data format, 2.3 GB), which fastembed cannot load
# as a custom model without special handling.
if runtime == "ingest":
return EmbeddingRuntimeConfig(
model_name=EMBEDDING_MODEL,
source=os.getenv("PGC_BGE_M3_INGEST_SOURCE", "Xenova/bge-m3"),
model_file=os.getenv("PGC_BGE_M3_INGEST_MODEL_FILE", "onnx/model_int8.onnx"),
providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
)
return EmbeddingRuntimeConfig(
model_name=EMBEDDING_MODEL,
source=os.getenv("PGC_BGE_M3_QUERY_SOURCE", "Xenova/bge-m3"),
model_file=os.getenv("PGC_BGE_M3_QUERY_MODEL_FILE", "onnx/model_int8.onnx"),
providers=["CPUExecutionProvider"],
)