File size: 1,496 Bytes
25a2fe8
c4f4db6
3ee72b0
c4f4db6
25a2fe8
c4f4db6
3ee72b0
 
c4f4db6
3ee72b0
25a2fe8
3ee72b0
c4f4db6
3ee72b0
c4f4db6
25a2fe8
3ee72b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c4f4db6
25a2fe8
 
 
 
 
3ee72b0
25a2fe8
 
c4f4db6
3ee72b0
 
c4f4db6
 
3ee72b0
25a2fe8
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import os
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
from sentence_transformers import SentenceTransformer
import uvicorn

# ✅ Fix cache permissions issue
os.environ["HF_HOME"] = "/tmp"

# ✅ Model selection
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
model = SentenceTransformer(MODEL_NAME, cache_folder="/tmp")

app = FastAPI()

@app.post("/v1/embeddings")
async def create_embeddings(request: Request):
    """
    OpenAI-compatible embeddings endpoint.
    Accepts: {"input": "your text here"}
    """
    data = await request.json()
    text_input = data.get("input")

    if text_input is None:
        return JSONResponse(
            {"error": {"message": "Missing 'input' field", "type": "invalid_request"}}, status_code=400
        )

    # ✅ Handle single string or list of strings
    if isinstance(text_input, str):
        text_input = [text_input]

    embeddings = model.encode(text_input, convert_to_numpy=True).tolist()

    response = {
        "object": "list",
        "data": [
            {
                "object": "embedding",
                "embedding": emb,
                "index": idx
            } for idx, emb in enumerate(embeddings)
        ],
        "model": MODEL_NAME,
        "usage": {
            "prompt_tokens": len(text_input),
            "total_tokens": len(text_input),
        }
    }
    return JSONResponse(response)

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)