Spaces:
Running
Running
chore: support `bge-m3` and `gte-multilingual-base` models
Browse files- README.md +1 -1
- lightweight_embeddings/__init__.py +5 -3
- lightweight_embeddings/router.py +1 -0
- lightweight_embeddings/service.py +8 -2
README.md
CHANGED
|
@@ -21,7 +21,7 @@ header: mini
|
|
| 21 |
- **Reranking Support**: Includes powerful reranking capabilities for both text and image inputs.
|
| 22 |
- **Optimized for Speed**: Built with lightweight transformer models and efficient backends for rapid inference, even on low-resource systems.
|
| 23 |
- **Flexible Model Support**: Use a range of transformer models tailored to diverse use cases:
|
| 24 |
-
- Text models: `
|
| 25 |
- Image model: `siglip-base-patch16-256-multilingual`
|
| 26 |
- **Production-Ready**: Easily deploy anywhere with Docker for hassle-free setup.
|
| 27 |
- **Interactive Playground**: Test embeddings and reranking directly via a **Gradio-powered interface** alongside detailed REST API documentation.
|
|
|
|
| 21 |
- **Reranking Support**: Includes powerful reranking capabilities for both text and image inputs.
|
| 22 |
- **Optimized for Speed**: Built with lightweight transformer models and efficient backends for rapid inference, even on low-resource systems.
|
| 23 |
- **Flexible Model Support**: Use a range of transformer models tailored to diverse use cases:
|
| 24 |
+
- Text models: `snowflake-arctic-embed-l-v2.0`, `bge-m3`, `gte-multilingual-base`, `paraphrase-multilingual-MiniLM-L12-v2`, `paraphrase-multilingual-mpnet-base-v2`, `multilingual-e5-small`, `multilingual-e5-base`, `multilingual-e5-large`.
|
| 25 |
- Image model: `siglip-base-patch16-256-multilingual`
|
| 26 |
- **Production-Ready**: Easily deploy anywhere with Docker for hassle-free setup.
|
| 27 |
- **Interactive Playground**: Test embeddings and reranking directly via a **Gradio-powered interface** alongside detailed REST API documentation.
|
lightweight_embeddings/__init__.py
CHANGED
|
@@ -13,6 +13,7 @@ Supported text model IDs:
|
|
| 13 |
- "paraphrase-multilingual-MiniLM-L12-v2"
|
| 14 |
- "paraphrase-multilingual-mpnet-base-v2"
|
| 15 |
- "bge-m3"
|
|
|
|
| 16 |
|
| 17 |
Supported image model ID:
|
| 18 |
- "siglip-base-patch16-256-multilingual"
|
|
@@ -135,12 +136,13 @@ def create_main_interface():
|
|
| 135 |
# Available model options for the dropdown
|
| 136 |
model_options = [
|
| 137 |
"snowflake-arctic-embed-l-v2.0",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
"multilingual-e5-small",
|
| 139 |
"multilingual-e5-base",
|
| 140 |
"multilingual-e5-large",
|
| 141 |
-
"paraphrase-multilingual-MiniLM-L12-v2",
|
| 142 |
-
"paraphrase-multilingual-mpnet-base-v2",
|
| 143 |
-
"bge-m3",
|
| 144 |
"siglip-base-patch16-256-multilingual",
|
| 145 |
]
|
| 146 |
|
|
|
|
| 13 |
- "paraphrase-multilingual-MiniLM-L12-v2"
|
| 14 |
- "paraphrase-multilingual-mpnet-base-v2"
|
| 15 |
- "bge-m3"
|
| 16 |
+
- "gte-multilingual-base"
|
| 17 |
|
| 18 |
Supported image model ID:
|
| 19 |
- "siglip-base-patch16-256-multilingual"
|
|
|
|
| 136 |
# Available model options for the dropdown
|
| 137 |
model_options = [
|
| 138 |
"snowflake-arctic-embed-l-v2.0",
|
| 139 |
+
"bge-m3",
|
| 140 |
+
"gte-multilingual-base",
|
| 141 |
+
"paraphrase-multilingual-MiniLM-L12-v2",
|
| 142 |
+
"paraphrase-multilingual-mpnet-base-v2",
|
| 143 |
"multilingual-e5-small",
|
| 144 |
"multilingual-e5-base",
|
| 145 |
"multilingual-e5-large",
|
|
|
|
|
|
|
|
|
|
| 146 |
"siglip-base-patch16-256-multilingual",
|
| 147 |
]
|
| 148 |
|
lightweight_embeddings/router.py
CHANGED
|
@@ -11,6 +11,7 @@ Supported Text Model IDs:
|
|
| 11 |
- "paraphrase-multilingual-MiniLM-L12-v2"
|
| 12 |
- "paraphrase-multilingual-mpnet-base-v2"
|
| 13 |
- "bge-m3"
|
|
|
|
| 14 |
|
| 15 |
Supported Image Model IDs:
|
| 16 |
- "siglip-base-patch16-256-multilingual"
|
|
|
|
| 11 |
- "paraphrase-multilingual-MiniLM-L12-v2"
|
| 12 |
- "paraphrase-multilingual-mpnet-base-v2"
|
| 13 |
- "bge-m3"
|
| 14 |
+
- "gte-multilingual-base"
|
| 15 |
|
| 16 |
Supported Image Model IDs:
|
| 17 |
- "siglip-base-patch16-256-multilingual"
|
lightweight_embeddings/service.py
CHANGED
|
@@ -18,6 +18,7 @@ Supported Text Model IDs:
|
|
| 18 |
- "paraphrase-multilingual-MiniLM-L12-v2"
|
| 19 |
- "paraphrase-multilingual-mpnet-base-v2"
|
| 20 |
- "bge-m3"
|
|
|
|
| 21 |
|
| 22 |
Supported Image Model IDs:
|
| 23 |
- "google/siglip-base-patch16-256-multilingual" (default, but extensible)
|
|
@@ -56,6 +57,7 @@ class TextModelType(str, Enum):
|
|
| 56 |
PARAPHRASE_MULTILINGUAL_MINILM_L12_V2 = "paraphrase-multilingual-MiniLM-L12-v2"
|
| 57 |
PARAPHRASE_MULTILINGUAL_MPNET_BASE_V2 = "paraphrase-multilingual-mpnet-base-v2"
|
| 58 |
BGE_M3 = "bge-m3"
|
|
|
|
| 59 |
|
| 60 |
|
| 61 |
class ImageModelType(str, Enum):
|
|
@@ -122,8 +124,12 @@ class ModelConfig:
|
|
| 122 |
onnx_file="onnx/model_quantized.onnx",
|
| 123 |
),
|
| 124 |
TextModelType.BGE_M3: ModelInfo(
|
| 125 |
-
model_id="
|
| 126 |
-
onnx_file="onnx/
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
),
|
| 128 |
}
|
| 129 |
return text_configs[self.text_model_type]
|
|
|
|
| 18 |
- "paraphrase-multilingual-MiniLM-L12-v2"
|
| 19 |
- "paraphrase-multilingual-mpnet-base-v2"
|
| 20 |
- "bge-m3"
|
| 21 |
+
- "gte-multilingual-base"
|
| 22 |
|
| 23 |
Supported Image Model IDs:
|
| 24 |
- "google/siglip-base-patch16-256-multilingual" (default, but extensible)
|
|
|
|
| 57 |
PARAPHRASE_MULTILINGUAL_MINILM_L12_V2 = "paraphrase-multilingual-MiniLM-L12-v2"
|
| 58 |
PARAPHRASE_MULTILINGUAL_MPNET_BASE_V2 = "paraphrase-multilingual-mpnet-base-v2"
|
| 59 |
BGE_M3 = "bge-m3"
|
| 60 |
+
GTE_MULTILINGUAL_BASE = "gte-multilingual-base"
|
| 61 |
|
| 62 |
|
| 63 |
class ImageModelType(str, Enum):
|
|
|
|
| 124 |
onnx_file="onnx/model_quantized.onnx",
|
| 125 |
),
|
| 126 |
TextModelType.BGE_M3: ModelInfo(
|
| 127 |
+
model_id="Xenova/bge-m3",
|
| 128 |
+
onnx_file="onnx/model_quantized.onnx",
|
| 129 |
+
),
|
| 130 |
+
TextModelType.GTE_MULTILINGUAL_BASE: ModelInfo(
|
| 131 |
+
model_id="onnx-community/gte-multilingual-base",
|
| 132 |
+
onnx_file="onnx/model_quantized.onnx",
|
| 133 |
),
|
| 134 |
}
|
| 135 |
return text_configs[self.text_model_type]
|