from fastapi import FastAPI, HTTPException from pydantic import BaseModel from sentence_transformers import SentenceTransformer import torch import redis import json import hashlib import os app = FastAPI() model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") # Redis connection using Hugging Face storage REDIS_HOST = os.getenv("REDIS_HOST", "localhost") redis_client = redis.Redis(host=REDIS_HOST, port=6379, db=0, decode_responses=True) class EmbedRequest(BaseModel): text: str def get_cache_key(text: str) -> str: return f"embed:{hashlib.md5(text.encode()).hexdigest()}" @app.get("/") async def root(): return {"message": "Embedding API is running! Use /embed to generate embeddings."} @app.post("/embed") async def embed(request: EmbedRequest): text = request.text.strip() if not text: raise HTTPException(status_code=400, detail="Text cannot be empty") cache_key = get_cache_key(text) cached_result = redis_client.get(cache_key) if cached_result: return json.loads(cached_result) embedding = model.encode([text], convert_to_tensor=True).cpu().tolist()[0] redis_client.setex(cache_key, 86400, json.dumps({"embedding": embedding})) return {"embedding": embedding} @app.get("/health") async def health_check(): redis_status = "ok" if redis_client.ping() else "down" return {"status": "ok", "gpu": torch.cuda.is_available(), "redis": redis_status}