Charan5775 commited on
Commit
c4f4db6
·
verified ·
1 Parent(s): 66163a9

Create server.py

Browse files
Files changed (1) hide show
  1. server.py +47 -0
server.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request
2
+ from pydantic import BaseModel
3
+ from sentence_transformers import SentenceTransformer
4
+ import time
5
+ import hashlib
6
+ import os
7
+
8
+ app = FastAPI()
9
+
10
+ # Load embedding model (fast + free)
11
+ model_name = "sentence-transformers/all-MiniLM-L6-v2"
12
+ model = SentenceTransformer(model_name)
13
+
14
+ # OpenAI compatible request/response
15
+ class EmbeddingRequest(BaseModel):
16
+ input: list[str]
17
+
18
+ class EmbeddingResponse(BaseModel):
19
+ object: str = "list"
20
+ data: list
21
+ model: str
22
+ usage: dict
23
+
24
+ @app.post("/v1/embeddings", response_model=EmbeddingResponse)
25
+ async def create_embeddings(request: EmbeddingRequest):
26
+ start_time = time.time()
27
+
28
+ embeddings = model.encode(request.input, convert_to_numpy=True).tolist()
29
+
30
+ # Pack in OpenAI-like format
31
+ data = []
32
+ for i, emb in enumerate(embeddings):
33
+ data.append({
34
+ "object": "embedding",
35
+ "index": i,
36
+ "embedding": emb
37
+ })
38
+
39
+ return {
40
+ "object": "list",
41
+ "data": data,
42
+ "model": model_name,
43
+ "usage": {
44
+ "prompt_tokens": len(" ".join(request.input).split()),
45
+ "total_tokens": len(" ".join(request.input).split())
46
+ }
47
+ }