File size: 1,453 Bytes
068b918
 
 
 
 
 
 
 
 
 
d59cc20
068b918
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from sentence_transformers import SentenceTransformer
import torch
import redis
import json
import hashlib
import os

app = FastAPI()
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# Redis connection using Hugging Face storage
REDIS_HOST = os.getenv("REDIS_HOST", "localhost")
redis_client = redis.Redis(host=REDIS_HOST, port=6379, db=0, decode_responses=True)

class EmbedRequest(BaseModel):
    text: str

def get_cache_key(text: str) -> str:
    return f"embed:{hashlib.md5(text.encode()).hexdigest()}"

@app.get("/")
async def root():
    return {"message": "Embedding API is running! Use /embed to generate embeddings."}

@app.post("/embed")
async def embed(request: EmbedRequest):
    text = request.text.strip()
    if not text:
        raise HTTPException(status_code=400, detail="Text cannot be empty")
    
    cache_key = get_cache_key(text)
    cached_result = redis_client.get(cache_key)
    if cached_result:
        return json.loads(cached_result)

    embedding = model.encode([text], convert_to_tensor=True).cpu().tolist()[0]
    redis_client.setex(cache_key, 86400, json.dumps({"embedding": embedding}))
    
    return {"embedding": embedding}

@app.get("/health")
async def health_check():
    redis_status = "ok" if redis_client.ping() else "down"
    return {"status": "ok", "gpu": torch.cuda.is_available(), "redis": redis_status}