import os from fastapi import FastAPI, Request from fastapi.responses import JSONResponse from sentence_transformers import SentenceTransformer import uvicorn # ✅ Fix cache permissions issue os.environ["HF_HOME"] = "/tmp" # ✅ Model selection MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2" model = SentenceTransformer(MODEL_NAME, cache_folder="/tmp") app = FastAPI() @app.post("/v1/embeddings") async def create_embeddings(request: Request): """ OpenAI-compatible embeddings endpoint. Accepts: {"input": "your text here"} """ data = await request.json() text_input = data.get("input") if text_input is None: return JSONResponse( {"error": {"message": "Missing 'input' field", "type": "invalid_request"}}, status_code=400 ) # ✅ Handle single string or list of strings if isinstance(text_input, str): text_input = [text_input] embeddings = model.encode(text_input, convert_to_numpy=True).tolist() response = { "object": "list", "data": [ { "object": "embedding", "embedding": emb, "index": idx } for idx, emb in enumerate(embeddings) ], "model": MODEL_NAME, "usage": { "prompt_tokens": len(text_input), "total_tokens": len(text_input), } } return JSONResponse(response) if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860)