Spaces:
Runtime error
Runtime error
File size: 1,453 Bytes
068b918 d59cc20 068b918 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from sentence_transformers import SentenceTransformer
import torch
import redis
import json
import hashlib
import os
app = FastAPI()
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
# Redis connection using Hugging Face storage
REDIS_HOST = os.getenv("REDIS_HOST", "localhost")
redis_client = redis.Redis(host=REDIS_HOST, port=6379, db=0, decode_responses=True)
class EmbedRequest(BaseModel):
text: str
def get_cache_key(text: str) -> str:
return f"embed:{hashlib.md5(text.encode()).hexdigest()}"
@app.get("/")
async def root():
return {"message": "Embedding API is running! Use /embed to generate embeddings."}
@app.post("/embed")
async def embed(request: EmbedRequest):
text = request.text.strip()
if not text:
raise HTTPException(status_code=400, detail="Text cannot be empty")
cache_key = get_cache_key(text)
cached_result = redis_client.get(cache_key)
if cached_result:
return json.loads(cached_result)
embedding = model.encode([text], convert_to_tensor=True).cpu().tolist()[0]
redis_client.setex(cache_key, 86400, json.dumps({"embedding": embedding}))
return {"embedding": embedding}
@app.get("/health")
async def health_check():
redis_status = "ok" if redis_client.ping() else "down"
return {"status": "ok", "gpu": torch.cuda.is_available(), "redis": redis_status}
|