Upload 3 files
Browse files- Dockerfile +1 -2
- app.py +30 -2
Dockerfile
CHANGED
|
@@ -5,10 +5,9 @@ ENV DEBIAN_FRONTEND=noninteractive \
|
|
| 5 |
PIP_NO_CACHE_DIR=1 \
|
| 6 |
HOME=/app \
|
| 7 |
HF_HOME=/app/.cache/huggingface \
|
| 8 |
-
TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers \
|
| 9 |
-
HUGGINGFACE_HUB_CACHE=/app/.cache/huggingface/hub \
|
| 10 |
XDG_CACHE_HOME=/app/.cache
|
| 11 |
|
|
|
|
| 12 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 13 |
python3 python3-pip git && \
|
| 14 |
rm -rf /var/lib/apt/lists/*
|
|
|
|
| 5 |
PIP_NO_CACHE_DIR=1 \
|
| 6 |
HOME=/app \
|
| 7 |
HF_HOME=/app/.cache/huggingface \
|
|
|
|
|
|
|
| 8 |
XDG_CACHE_HOME=/app/.cache
|
| 9 |
|
| 10 |
+
|
| 11 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 12 |
python3 python3-pip git && \
|
| 13 |
rm -rf /var/lib/apt/lists/*
|
app.py
CHANGED
|
@@ -33,7 +33,7 @@ def check_auth(auth_header: Optional[str]):
|
|
| 33 |
# ββ Model registry (lazy loading) ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 34 |
MODELS: Dict[str, Any] = {"blip2": None, "cogvlm": None}
|
| 35 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 36 |
-
CACHE_DIR = os.environ.get("HF_HOME"
|
| 37 |
|
| 38 |
|
| 39 |
def load_blip2():
|
|
@@ -62,7 +62,12 @@ def load_cogvlm():
|
|
| 62 |
from transformers import AutoProcessor, AutoTokenizer, AutoModelForCausalLM
|
| 63 |
name = os.environ.get("COGVLM_NAME", "THUDM/cogvlm2-llama3-captioner")
|
| 64 |
processor = AutoProcessor.from_pretrained(name, trust_remote_code=True, cache_dir=CACHE_DIR)
|
| 65 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
model = AutoModelForCausalLM.from_pretrained(
|
| 67 |
name,
|
| 68 |
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
|
|
@@ -84,10 +89,33 @@ def caption_cogvlm(image: Image.Image, prompt: Optional[str], max_new_tokens: in
|
|
| 84 |
return caption
|
| 85 |
|
| 86 |
# ββ Routes ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
@app.get("/health")
|
| 88 |
def health():
|
| 89 |
return {"status": "ok", "device": DEVICE, "cuda": torch.cuda.is_available()}
|
| 90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
@app.post("/caption")
|
| 92 |
async def caption(
|
| 93 |
file: UploadFile = File(...),
|
|
|
|
| 33 |
# ββ Model registry (lazy loading) ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 34 |
MODELS: Dict[str, Any] = {"blip2": None, "cogvlm": None}
|
| 35 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 36 |
+
CACHE_DIR = os.environ.get("HF_HOME", "/app/.cache/huggingface")
|
| 37 |
|
| 38 |
|
| 39 |
def load_blip2():
|
|
|
|
| 62 |
from transformers import AutoProcessor, AutoTokenizer, AutoModelForCausalLM
|
| 63 |
name = os.environ.get("COGVLM_NAME", "THUDM/cogvlm2-llama3-captioner")
|
| 64 |
processor = AutoProcessor.from_pretrained(name, trust_remote_code=True, cache_dir=CACHE_DIR)
|
| 65 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
| 66 |
+
name,
|
| 67 |
+
trust_remote_code=True,
|
| 68 |
+
use_fast=False, # CogVLM mΕ―ΕΎe mΓt problΓ©my s fast tokenizerem
|
| 69 |
+
cache_dir=CACHE_DIR
|
| 70 |
+
)
|
| 71 |
model = AutoModelForCausalLM.from_pretrained(
|
| 72 |
name,
|
| 73 |
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
|
|
|
|
| 89 |
return caption
|
| 90 |
|
| 91 |
# ββ Routes ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 92 |
+
@app.get("/")
|
| 93 |
+
def root():
|
| 94 |
+
return {
|
| 95 |
+
"message": "Image Captioning API (BLIP2 / CogVLM)",
|
| 96 |
+
"endpoints": ["/health", "/caption"],
|
| 97 |
+
"device": DEVICE,
|
| 98 |
+
"models": list(MODELS.keys())
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
@app.get("/health")
|
| 102 |
def health():
|
| 103 |
return {"status": "ok", "device": DEVICE, "cuda": torch.cuda.is_available()}
|
| 104 |
|
| 105 |
+
@app.get("/caption")
|
| 106 |
+
def caption_info():
|
| 107 |
+
return {
|
| 108 |
+
"method": "POST",
|
| 109 |
+
"description": "Upload image and get caption",
|
| 110 |
+
"parameters": {
|
| 111 |
+
"file": "image file (required)",
|
| 112 |
+
"model": "blip2 or cogvlm (default: blip2)",
|
| 113 |
+
"prompt": "custom prompt (optional)",
|
| 114 |
+
"max_new_tokens": "max tokens to generate (default: 64)"
|
| 115 |
+
},
|
| 116 |
+
"auth": "Bearer token in Authorization header (if API_TOKEN is set)"
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
@app.post("/caption")
|
| 120 |
async def caption(
|
| 121 |
file: UploadFile = File(...),
|