| import gradio as gr
|
| from fastapi import FastAPI, Request
|
| import uvicorn
|
| from sentence_transformers import SentenceTransformer
|
| from sentence_transformers.util import cos_sim
|
| from sentence_transformers.quantization import quantize_embeddings
|
|
|
|
|
| import spaces
|
|
|
|
|
|
|
| app = FastAPI()
|
|
|
|
|
| @spaces.GPU
|
| def embed(text):
|
| return [0,1]
|
|
|
|
|
|
|
|
|
|
|
| @app.post("/v1/embeddings")
|
| async def openai_embeddings(request: Request):
|
| body = await request.json();
|
| print(body);
|
|
|
| model = body['model']
|
| text = body['input'];
|
| embeddings = embed(text)
|
| return {
|
| 'object': "list"
|
| ,'data': [{
|
| 'object': "embeddings"
|
| ,'embedding': embeddings
|
| ,'index':0
|
| }]
|
| ,'model':model
|
| ,'usage':{
|
| 'prompt_tokens': 0
|
| ,'total_tokens': 0
|
| }
|
| }
|
|
|
| def fn(text):
|
| embed(text);
|
|
|
| with gr.Blocks(fill_height=True) as demo:
|
| text = gr.Textbox();
|
| embeddings = gr.Textbox()
|
|
|
| text.submit(fn, [text], [embeddings]);
|
|
|
|
|
| print("Loading embedding model");
|
| Embedder = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| print("Demo run...");
|
| (app2,url,other) = demo.launch(prevent_thread_lock=True, server_name=None, port=8000);
|
|
|
| print("Mounting app...");
|
| GradioApp = gr.mount_gradio_app(app, demo, path="/", ssr_mode=False);
|
|
|
|
|
| if __name__ == '__main__':
|
| print("Running uviconr...");
|
| uvicorn.run(GradioApp, host="0.0.0.0", port=7860)
|
|
|
|
|
|
|
| |