Aaron Ploetz
working through Gradio errors
a26286a
raw
history blame
6.12 kB
import json
import os
import gradio
from fastapi import FastAPI
from fastapi.responses import JSONResponse
from sentence_transformers import SentenceTransformer
from typing import List, Dict, Any
MODELS = [
"ibm-granite/granite-embedding-30m-english",
"ibm-granite/granite-embedding-278m-multilingual"
]
current_model = None
model = None
app = FastAPI()
def load_model(model_name: str):
global current_model
if current_model is not None and current_model == model_name:
return current_model
try:
current_model = SentenceTransformer(model_name)
except Exception as ex:
raise ValueError(f"Failed to load model '{model_name}': {str(ex)}")
return current_model
def embed(document: str, model_name: str):
if model_name:
try:
new_model = load_model(model_name)
return new_model.encode(document)
except Exception as ex:
raise ValueError(f"Failed to load model '{model_name}': {str(ex)}")
return None
@app.get("/models")
async def get_models():
return JSONResponse(
content={
"models": MODELS
}
)
@app.post("/embed")
async def generate_embedding(data: Dict[str, Any]):
try:
text = data.get("text", "")
model_name = data.get("model","")
if not text:
return JSONResponse(
status_code=400,
content={"error": "No text provided"}
)
if model_name not in MODELS:
message = f"Only IBM Granite embedding models can be used: {MODELS}"
return JSONResponse(
status_code=400,
content={"error": message}
)
if model_name:
vector_embedding = embed(text, model_name)
return JSONResponse(
content={
"embedding": vector_embedding.tolist(),
"dim": len(vector_embedding),
"model": model_name
}
)
except Exception as ex:
return JSONResponse(
status_code=500,
content={"error": str(ex)}
)
with gradio.Blocks(title="Multi-Model Text Embeddings", css_path="./style.css") as gradio_app:
gradio.Markdown("# Multi-Model Text Embeddings")
gradio.Markdown("Generate embeddings for your text using the IBM Granite embedding models.")
# Model selector dropdown (allows custom input)
model_dropdown = gradio.Dropdown(
choices=MODELS,
value="",
label="Select Embedding Model",
info="Choose any predefined model name",
allow_custom_value=True
)
# Create an input text box
text_input = gradio.Textbox(label="Enter text to embed", placeholder="Type or paste your text here...")
# Create an output component to display the embedding
output = gradio.JSON(label="Text Embedding", elem_classes=["json-holder"])
# Add a submit button with API name
submit_btn = gradio.Button("Generate Embedding", variant="primary")
# Handle both button click and text submission
submit_btn.click(embed, inputs=[text_input, model_dropdown], outputs=output, api_name="predict")
text_input.submit(embed, inputs=[text_input, model_dropdown], outputs=output)
# Add API usage guide
gradio.Markdown("## API Usage")
gradio.Markdown("""
You can use this API in two ways: via the direct FastAPI endpoint or through Gradio clients.
### List Available Models
```bash
curl https://aploetz-granite-embeddings.hf.space/models
```
### Direct API Endpoint (No Queue!)
```bash
# Default model (nomic-ai/nomic-embed-text-v1.5)
curl -X POST https://ipepe-nomic-embeddings.hf.space/embed \
-H "Content-Type: application/json" \
-d '{"text": "Your text to embed goes here"}'
# With predefined model (trust_remote_code allowed)
curl -X POST https://ipepe-nomic-embeddings.hf.space/embed \
-H "Content-Type: application/json" \
-d '{"text": "Your text to embed goes here", "model": "sentence-transformers/all-MiniLM-L6-v2"}'
# With any Hugging Face model (trust_remote_code=False for security)
curl -X POST https://ipepe-nomic-embeddings.hf.space/embed \
-H "Content-Type: application/json" \
-d '{"text": "Your text to embed goes here", "model": "intfloat/e5-base-v2"}'
```
Response format:
```json
{
"embedding": [0.123, -0.456, ...],
"dim": 384,
"model": "sentence-transformers/all-MiniLM-L6-v2",
"trust_remote_code": false,
"predefined": true
}
```
### Python Example (Direct API)
```python
import requests
# List available models
models = requests.get("https://ipepe-nomic-embeddings.hf.space/models").json()
print(models["models"])
# Generate embedding with specific model
response = requests.post(
"https://ipepe-nomic-embeddings.hf.space/embed",
json={
"text": "Your text to embed goes here",
"model": "BAAI/bge-small-en-v1.5"
}
)
result = response.json()
embedding = result["embedding"]
```
### Python Example (Gradio Client)
```python
from gradio_client import Client
client = Client("ipepe/nomic-embeddings")
result = client.predict(
"Your text to embed goes here",
"nomic-ai/nomic-embed-text-v1.5", # model selection
api_name="/predict"
)
print(result) # Returns the embedding array
```
### Available Models
- `ibm-granite/granite-embedding-30m-english` - IBM Granite 30M English embedding model
- `ibm-granite/granite-embedding-278m-multilingual` - IBM Granite 278M multilingual embedding model
""")
if __name__ == '__main__':
# Mount FastAPI app to Gradio
gradio_app = gradio.mount_gradio_app(app, gradio_app, path="/")
# Run with Uvicorn (Gradio uses this internally)
import uvicorn
uvicorn.run(gradio_app, host="0.0.0.0", port=7860)