| import os
|
|
|
|
|
| os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
|
| os.environ["GRADIO_SERVER_PORT"] = "7860"
|
|
|
|
|
| import gradio as gr
|
| from fastapi import FastAPI, Request
|
| import uvicorn
|
| import spaces
|
| from sentence_transformers import SentenceTransformer
|
| from sentence_transformers.util import cos_sim
|
| from sentence_transformers.quantization import quantize_embeddings
|
|
|
|
|
| app = FastAPI()
|
|
|
|
|
| @spaces.GPU
|
| def embed(text):
|
|
|
| query_embedding = Embedder.encode(text)
|
| return query_embedding.tolist();
|
|
|
|
|
|
|
|
|
| with gr.Blocks(fill_height=True) as demo:
|
| text = gr.Textbox();
|
| embeddings = gr.Textbox()
|
|
|
| text.submit(embed, [text], [embeddings]);
|
|
|
|
|
|
|
|
|
|
|
|
|
| @app.post("/v1/embeddings")
|
| async def openai_embeddings(request: Request):
|
| body = await request.json();
|
| print(body);
|
|
|
| model = body['model']
|
| text = body['input'];
|
| embeddings = embed(text)
|
| return {
|
| 'object': "list"
|
| ,'data': [{
|
| 'object': "embeddings"
|
| ,'embedding': embeddings
|
| ,'index':0
|
| }]
|
| ,'model':model
|
| ,'usage':{
|
| 'prompt_tokens': 0
|
| ,'total_tokens': 0
|
| }
|
| }
|
|
|
|
|
| print("Loading embedding model");
|
| Embedder = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
|
|
|
|
|
| GradioApp = gr.mount_gradio_app(app, demo, path="", ssr_mode=False);
|
|
|
| if __name__ == "__main__":
|
| uvicorn.run(GradioApp, port=7860, host="0.0.0.0")
|
| |