Aaron Ploetz commited on
Commit
83ac2da
Β·
1 Parent(s): 74ab0c8

initial commit

Browse files
Files changed (2) hide show
  1. app.py +219 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import gradio
4
+
5
+ from fastapi import FastAPI
6
+ from fastapi.responses import JSONResponse
7
+ from sentence_transformers import SentenceTransformer
8
+ from typing import List, Dict, Any
9
+
10
+ MODELS = [
11
+ "ibm-granite/granite-embedding-30m-english",
12
+ "ibm-granite/granite-embedding-278m-multilingual"
13
+ ]
14
+
15
+ current_model = None
16
+ model = None
17
+ app = FastAPI()
18
+
19
+ def load_model(model_name: str):
20
+ global current_model
21
+
22
+ if current_model is not None and current_model == model_name:
23
+ return current_model
24
+
25
+ try:
26
+ current_model = SentenceTransformer(model_name)
27
+ except Exception as ex:
28
+ raise ValueError(f"Failed to load model '{model_name}': {str(ex)}")
29
+
30
+ return current_model
31
+
32
+ def embed(document: str, model_name: str):
33
+ if model_name:
34
+ try:
35
+ new_model = load_model(model_name)
36
+ return new_model.encode(document)
37
+ except Exception as ex:
38
+ raise ValueError(f"Failed to load model '{model_name}': {str(ex)}")
39
+
40
+ return None
41
+
42
+ @app.get("/models")
43
+ async def get_models():
44
+ return JSONResponse(
45
+ content={
46
+ "models": MODELS
47
+ }
48
+ )
49
+
50
+ @app.post("/embed")
51
+ async def generate_embedding(data: Dict[str, Any]):
52
+ try:
53
+ text = data.get("text", "")
54
+ model_name = data.get("model","")
55
+
56
+ if not text:
57
+ return JSONResponse(
58
+ status_code=400,
59
+ content={"error": "No text provided"}
60
+ )
61
+
62
+ if model_name not in MODELS:
63
+ message = f"Only IBM Granite embedding models can be used: {MODELS}"
64
+ return JSONResponse(
65
+ status_code=400,
66
+ content={"error": message}
67
+ )
68
+
69
+ if model_name:
70
+ vector_embedding = embed(text, model_name)
71
+
72
+ return JSONResponse(
73
+ content={
74
+ "embedding": vector_embedding.tolist(),
75
+ "dim": len(vector_embedding),
76
+ "model": model_name
77
+ }
78
+ )
79
+
80
+ except Exception as e:
81
+ return JSONResponse(
82
+ status_code=500,
83
+ content={"error": str(e)}
84
+ )
85
+
86
+ with gradio.Blocks(title="Multi-Model Text Embeddings", css="""
87
+ .json-holder {
88
+ max-height: 400px !important;
89
+ overflow-y: auto !important;
90
+ }
91
+ .json-holder .wrap {
92
+ max-height: 400px !important;
93
+ overflow-y: auto !important;
94
+ }
95
+ """) as gradio_app:
96
+ gradio.Markdown("# Multi-Model Text Embeddings")
97
+ gradio.Markdown("Generate embeddings for your text using 28+ state-of-the-art embedding models including top MTEB performers like NV-Embed-v2, gte-Qwen2-7B-instruct, Nomic, BGE, Snowflake, IBM Granite, Qwen3, Stella, and more.")
98
+ gradio.Markdown(f"**Device**: {DEVICE.upper()} {'πŸš€' if DEVICE == 'cuda' else 'πŸ’»'}")
99
+
100
+ # Model selector dropdown (allows custom input)
101
+ model_dropdown = gradio.Dropdown(
102
+ choices=MODELS,
103
+ value="",
104
+ label="Select Embedding Model",
105
+ info="Choose any predefined model name",
106
+ allow_custom_value=True
107
+ )
108
+
109
+ # Create an input text box
110
+ text_input = gradio.Textbox(label="Enter text to embed", placeholder="Type or paste your text here...")
111
+
112
+ # Create an output component to display the embedding
113
+ output = gradio.JSON(label="Text Embedding", elem_classes=["json-holder"])
114
+
115
+ # Add a submit button with API name
116
+ submit_btn = gradio.Button("Generate Embedding", variant="primary")
117
+
118
+ # Handle both button click and text submission
119
+ submit_btn.click(embed, inputs=[text_input, model_dropdown], outputs=output, api_name="predict")
120
+ text_input.submit(embed, inputs=[text_input, model_dropdown], outputs=output)
121
+
122
+ # Add API usage guide
123
+ gradio.Markdown("## API Usage")
124
+ gradio.Markdown("""
125
+ You can use this API in two ways: via the direct FastAPI endpoint or through Gradio clients.
126
+
127
+ ### List Available Models
128
+ ```bash
129
+ curl https://aploetz-granite-embeddings.hf.space/models
130
+ ```
131
+
132
+ ### Direct API Endpoint (No Queue!)
133
+ ```bash
134
+ # Default model (nomic-ai/nomic-embed-text-v1.5)
135
+ curl -X POST https://ipepe-nomic-embeddings.hf.space/embed \
136
+ -H "Content-Type: application/json" \
137
+ -d '{"text": "Your text to embed goes here"}'
138
+
139
+ # With predefined model (trust_remote_code allowed)
140
+ curl -X POST https://ipepe-nomic-embeddings.hf.space/embed \
141
+ -H "Content-Type: application/json" \
142
+ -d '{"text": "Your text to embed goes here", "model": "sentence-transformers/all-MiniLM-L6-v2"}'
143
+
144
+ # With any Hugging Face model (trust_remote_code=False for security)
145
+ curl -X POST https://ipepe-nomic-embeddings.hf.space/embed \
146
+ -H "Content-Type: application/json" \
147
+ -d '{"text": "Your text to embed goes here", "model": "intfloat/e5-base-v2"}'
148
+ ```
149
+
150
+ Response format:
151
+ ```json
152
+ {
153
+ "embedding": [0.123, -0.456, ...],
154
+ "dim": 384,
155
+ "model": "sentence-transformers/all-MiniLM-L6-v2",
156
+ "trust_remote_code": false,
157
+ "predefined": true
158
+ }
159
+ ```
160
+
161
+ ### Python Example (Direct API)
162
+ ```python
163
+ import requests
164
+
165
+ # List available models
166
+ models = requests.get("https://ipepe-nomic-embeddings.hf.space/models").json()
167
+ print(models["models"])
168
+
169
+ # Generate embedding with specific model
170
+ response = requests.post(
171
+ "https://ipepe-nomic-embeddings.hf.space/embed",
172
+ json={
173
+ "text": "Your text to embed goes here",
174
+ "model": "BAAI/bge-small-en-v1.5"
175
+ }
176
+ )
177
+ result = response.json()
178
+ embedding = result["embedding"]
179
+ ```
180
+
181
+ ### Python Example (Gradio Client)
182
+ ```python
183
+ from gradio_client import Client
184
+
185
+ client = Client("ipepe/nomic-embeddings")
186
+ result = client.predict(
187
+ "Your text to embed goes here",
188
+ "nomic-ai/nomic-embed-text-v1.5", # model selection
189
+ api_name="/predict"
190
+ )
191
+ print(result) # Returns the embedding array
192
+ ```
193
+
194
+ ### Available Models
195
+ - `nomic-ai/nomic-embed-text-v1.5` (default) - High-performing open embedding model with large token context
196
+ - `nomic-ai/nomic-embed-text-v1` - Previous version of Nomic embedding model
197
+ - `mixedbread-ai/mxbai-embed-large-v1` - State-of-the-art large embedding model from mixedbread.ai
198
+ - `BAAI/bge-m3` - Multi-functional, multi-lingual, multi-granularity embedding model
199
+ - `sentence-transformers/all-MiniLM-L6-v2` - Fast, small embedding model for general use
200
+ - `sentence-transformers/all-mpnet-base-v2` - Balanced performance embedding model
201
+ - `Snowflake/snowflake-arctic-embed-m` - Medium-sized Arctic embedding model
202
+ - `Snowflake/snowflake-arctic-embed-l` - Large Arctic embedding model
203
+ - `Snowflake/snowflake-arctic-embed-m-long` - Medium Arctic model optimized for long context
204
+ - `Snowflake/snowflake-arctic-embed-m-v2.0` - Latest Arctic embedding with multilingual support
205
+ - `BAAI/bge-large-en-v1.5` - Large BGE embedding model for English
206
+ - `BAAI/bge-base-en-v1.5` - Base BGE embedding model for English
207
+ - `BAAI/bge-small-en-v1.5` - Small BGE embedding model for English
208
+ - `sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2` - Multilingual paraphrase model
209
+ - `ibm-granite/granite-embedding-30m-english` - IBM Granite 30M English embedding model
210
+ - `ibm-granite/granite-embedding-278m-multilingual` - IBM Granite 278M multilingual embedding model
211
+ """)
212
+
213
+ if __name__ == '__main__':
214
+ # Mount FastAPI app to Gradio
215
+ gradio_app = gradio.mount_gradio_app(app, gradio_app, path="/")
216
+
217
+ # Run with Uvicorn (Gradio uses this internally)
218
+ import uvicorn
219
+ uvicorn.run(app, host="0.0.0.0", port=7860)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ sentence_transformers
2
+ fastapi
3
+ uvicorn