Spaces:

Semnykcz
/

apiserver

Sleeping

App Files Files Community

Semnykcz commited on Aug 23, 2025

Commit

d750f4b

verified ·

1 Parent(s): cdf47de

Upload 3 files

Browse files

Files changed (2) hide show

Dockerfile +1 -2
app.py +30 -2

Dockerfile CHANGED Viewed

@@ -5,10 +5,9 @@ ENV DEBIAN_FRONTEND=noninteractive \
     PIP_NO_CACHE_DIR=1 \
     HOME=/app \
     HF_HOME=/app/.cache/huggingface \
-    TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers \
-    HUGGINGFACE_HUB_CACHE=/app/.cache/huggingface/hub \
     XDG_CACHE_HOME=/app/.cache
 RUN apt-get update && apt-get install -y --no-install-recommends \
     python3 python3-pip git && \
     rm -rf /var/lib/apt/lists/*

     PIP_NO_CACHE_DIR=1 \
     HOME=/app \
     HF_HOME=/app/.cache/huggingface \
     XDG_CACHE_HOME=/app/.cache
 RUN apt-get update && apt-get install -y --no-install-recommends \
     python3 python3-pip git && \
     rm -rf /var/lib/apt/lists/*

app.py CHANGED Viewed

@@ -33,7 +33,7 @@ def check_auth(auth_header: Optional[str]):
 # ── Model registry (lazy loading) ──────────────────────────────────────────────
 MODELS: Dict[str, Any] = {"blip2": None, "cogvlm": None}
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-CACHE_DIR = os.environ.get("HF_HOME") or os.environ.get("TRANSFORMERS_CACHE") or "/app/.cache/huggingface"
 def load_blip2():
@@ -62,7 +62,12 @@ def load_cogvlm():
     from transformers import AutoProcessor, AutoTokenizer, AutoModelForCausalLM
     name = os.environ.get("COGVLM_NAME", "THUDM/cogvlm2-llama3-captioner")
     processor = AutoProcessor.from_pretrained(name, trust_remote_code=True, cache_dir=CACHE_DIR)
-    tokenizer = AutoTokenizer.from_pretrained(name, trust_remote_code=True, use_fast=False, cache_dir=CACHE_DIR)
     model = AutoModelForCausalLM.from_pretrained(
         name,
         torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
@@ -84,10 +89,33 @@ def caption_cogvlm(image: Image.Image, prompt: Optional[str], max_new_tokens: in
     return caption
 # ── Routes ────────────────────────────────────────────────────────────────────
 @app.get("/health")
 def health():
     return {"status": "ok", "device": DEVICE, "cuda": torch.cuda.is_available()}
 @app.post("/caption")
 async def caption(
         file: UploadFile = File(...),

 # ── Model registry (lazy loading) ──────────────────────────────────────────────
 MODELS: Dict[str, Any] = {"blip2": None, "cogvlm": None}
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+CACHE_DIR = os.environ.get("HF_HOME", "/app/.cache/huggingface")
 def load_blip2():
     from transformers import AutoProcessor, AutoTokenizer, AutoModelForCausalLM
     name = os.environ.get("COGVLM_NAME", "THUDM/cogvlm2-llama3-captioner")
     processor = AutoProcessor.from_pretrained(name, trust_remote_code=True, cache_dir=CACHE_DIR)
+    tokenizer = AutoTokenizer.from_pretrained(
+        name,
+        trust_remote_code=True,
+        use_fast=False, # CogVLM může mít problémy s fast tokenizerem
+        cache_dir=CACHE_DIR
+    )
     model = AutoModelForCausalLM.from_pretrained(
         name,
         torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
     return caption
 # ── Routes ────────────────────────────────────────────────────────────────────
+@app.get("/")
+def root():
+    return {
+        "message": "Image Captioning API (BLIP2 / CogVLM)",
+        "endpoints": ["/health", "/caption"],
+        "device": DEVICE,
+        "models": list(MODELS.keys())
+    }
 @app.get("/health")
 def health():
     return {"status": "ok", "device": DEVICE, "cuda": torch.cuda.is_available()}
+@app.get("/caption")
+def caption_info():
+    return {
+        "method": "POST",
+        "description": "Upload image and get caption",
+        "parameters": {
+            "file": "image file (required)",
+            "model": "blip2 or cogvlm (default: blip2)",
+            "prompt": "custom prompt (optional)",
+            "max_new_tokens": "max tokens to generate (default: 64)"
+        },
+        "auth": "Bearer token in Authorization header (if API_TOKEN is set)"
+    }
 @app.post("/caption")
 async def caption(
         file: UploadFile = File(...),