Semnykcz commited on
Commit
d750f4b
Β·
verified Β·
1 Parent(s): cdf47de

Upload 3 files

Browse files
Files changed (2) hide show
  1. Dockerfile +1 -2
  2. app.py +30 -2
Dockerfile CHANGED
@@ -5,10 +5,9 @@ ENV DEBIAN_FRONTEND=noninteractive \
5
  PIP_NO_CACHE_DIR=1 \
6
  HOME=/app \
7
  HF_HOME=/app/.cache/huggingface \
8
- TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers \
9
- HUGGINGFACE_HUB_CACHE=/app/.cache/huggingface/hub \
10
  XDG_CACHE_HOME=/app/.cache
11
 
 
12
  RUN apt-get update && apt-get install -y --no-install-recommends \
13
  python3 python3-pip git && \
14
  rm -rf /var/lib/apt/lists/*
 
5
  PIP_NO_CACHE_DIR=1 \
6
  HOME=/app \
7
  HF_HOME=/app/.cache/huggingface \
 
 
8
  XDG_CACHE_HOME=/app/.cache
9
 
10
+
11
  RUN apt-get update && apt-get install -y --no-install-recommends \
12
  python3 python3-pip git && \
13
  rm -rf /var/lib/apt/lists/*
app.py CHANGED
@@ -33,7 +33,7 @@ def check_auth(auth_header: Optional[str]):
33
  # ── Model registry (lazy loading) ──────────────────────────────────────────────
34
  MODELS: Dict[str, Any] = {"blip2": None, "cogvlm": None}
35
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
36
- CACHE_DIR = os.environ.get("HF_HOME") or os.environ.get("TRANSFORMERS_CACHE") or "/app/.cache/huggingface"
37
 
38
 
39
  def load_blip2():
@@ -62,7 +62,12 @@ def load_cogvlm():
62
  from transformers import AutoProcessor, AutoTokenizer, AutoModelForCausalLM
63
  name = os.environ.get("COGVLM_NAME", "THUDM/cogvlm2-llama3-captioner")
64
  processor = AutoProcessor.from_pretrained(name, trust_remote_code=True, cache_dir=CACHE_DIR)
65
- tokenizer = AutoTokenizer.from_pretrained(name, trust_remote_code=True, use_fast=False, cache_dir=CACHE_DIR)
 
 
 
 
 
66
  model = AutoModelForCausalLM.from_pretrained(
67
  name,
68
  torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
@@ -84,10 +89,33 @@ def caption_cogvlm(image: Image.Image, prompt: Optional[str], max_new_tokens: in
84
  return caption
85
 
86
  # ── Routes ────────────────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
87
  @app.get("/health")
88
  def health():
89
  return {"status": "ok", "device": DEVICE, "cuda": torch.cuda.is_available()}
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  @app.post("/caption")
92
  async def caption(
93
  file: UploadFile = File(...),
 
33
  # ── Model registry (lazy loading) ──────────────────────────────────────────────
34
  MODELS: Dict[str, Any] = {"blip2": None, "cogvlm": None}
35
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
36
+ CACHE_DIR = os.environ.get("HF_HOME", "/app/.cache/huggingface")
37
 
38
 
39
  def load_blip2():
 
62
  from transformers import AutoProcessor, AutoTokenizer, AutoModelForCausalLM
63
  name = os.environ.get("COGVLM_NAME", "THUDM/cogvlm2-llama3-captioner")
64
  processor = AutoProcessor.from_pretrained(name, trust_remote_code=True, cache_dir=CACHE_DIR)
65
+ tokenizer = AutoTokenizer.from_pretrained(
66
+ name,
67
+ trust_remote_code=True,
68
+ use_fast=False, # CogVLM mΕ―ΕΎe mΓ­t problΓ©my s fast tokenizerem
69
+ cache_dir=CACHE_DIR
70
+ )
71
  model = AutoModelForCausalLM.from_pretrained(
72
  name,
73
  torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
 
89
  return caption
90
 
91
  # ── Routes ────────────────────────────────────────────────────────────────────
92
+ @app.get("/")
93
+ def root():
94
+ return {
95
+ "message": "Image Captioning API (BLIP2 / CogVLM)",
96
+ "endpoints": ["/health", "/caption"],
97
+ "device": DEVICE,
98
+ "models": list(MODELS.keys())
99
+ }
100
+
101
  @app.get("/health")
102
  def health():
103
  return {"status": "ok", "device": DEVICE, "cuda": torch.cuda.is_available()}
104
 
105
+ @app.get("/caption")
106
+ def caption_info():
107
+ return {
108
+ "method": "POST",
109
+ "description": "Upload image and get caption",
110
+ "parameters": {
111
+ "file": "image file (required)",
112
+ "model": "blip2 or cogvlm (default: blip2)",
113
+ "prompt": "custom prompt (optional)",
114
+ "max_new_tokens": "max tokens to generate (default: 64)"
115
+ },
116
+ "auth": "Bearer token in Authorization header (if API_TOKEN is set)"
117
+ }
118
+
119
  @app.post("/caption")
120
  async def caption(
121
  file: UploadFile = File(...),