from app.embedding_runtime import EMBEDDING_DIM, EMBEDDING_MODEL, get_runtime_config def test_query_runtime_uses_cpu_int8_contract(monkeypatch): monkeypatch.delenv("PGC_BGE_M3_QUERY_SOURCE", raising=False) monkeypatch.delenv("PGC_BGE_M3_QUERY_MODEL_FILE", raising=False) cfg = get_runtime_config("query") assert EMBEDDING_MODEL == "pgc/bge-m3-onnx" assert EMBEDDING_DIM == 1024 assert cfg.providers == ["CPUExecutionProvider"] assert cfg.model_file == "onnx/model_int8.onnx" def test_ingest_runtime_prefers_cuda_with_cpu_fallback(monkeypatch): monkeypatch.delenv("PGC_BGE_M3_INGEST_SOURCE", raising=False) monkeypatch.delenv("PGC_BGE_M3_INGEST_MODEL_FILE", raising=False) cfg = get_runtime_config("ingest") assert cfg.providers == ["CUDAExecutionProvider", "CPUExecutionProvider"] assert cfg.model_file == "onnx/model.onnx"