File size: 1,083 Bytes
d6f2dc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27e2ef7
d6f2dc1
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from sentence_transformers import SentenceTransformer
import litserve as ls
from fastapi import Depends, HTTPException
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
import os

class EmbeddingAPI(ls.LitAPI):
    def setup(self, device):
        self.model = SentenceTransformer(
            'GokulRajaR/embeddinggemma-300m-qat-q8_0-unquantized',
            device=device,
            trust_remote_code=True,
            token=os.getenv("HF_TOKEN")
        )

    def decode_request(self, request):
        return request

    def predict(self, query):
            return self.model.encode_query(query)

    def encode_response(self, output):
        return output.tolist()
    
    def authorize(self, auth: HTTPAuthorizationCredentials = Depends(HTTPBearer())):
        if auth.scheme != "Bearer" or auth.credentials != os.getenv("auth_token"):
            raise HTTPException(status_code=401, detail="Bad token")

if __name__ == "__main__":
    api = EmbeddingAPI()
    server = ls.LitServer(api, devices="cpu", accelerator="cpu")
    server.run(port=7860)