Spaces:

stephenebert
/

Image_Tagger

Sleeping

App Files Files Community

stephenebert commited on Aug 13, 2025

Commit

a184b98

verified ·

1 Parent(s): f33deaf

Update tagger.py

Browse files

Files changed (1) hide show

tagger.py +24 -39

tagger.py CHANGED Viewed

@@ -1,11 +1,10 @@
 from __future__ import annotations
 """
-Image captioning + simple tag extraction (no POS/NLTK).
-- Caption: Salesforce/blip-image-captioning-base (Transformers)
-- Tags: first unique meaningful words from the caption (stopwords removed)
-- Sidecar: writes ./data/<stem>.json with {"caption","tags","timestamp"}
 """
 import os
@@ -13,42 +12,35 @@ import datetime as _dt
 import json as _json
 import pathlib as _pl
 import re as _re
-import sys as _sys
-from typing import List
 import torch
 from PIL import Image
 from transformers import BlipForConditionalGeneration, BlipProcessor
-# Where to save caption+tags JSON (writable on Hugging Face Spaces)
 CAP_TAG_DIR = _pl.Path(os.environ.get("CAP_TAG_DIR", "./data")).resolve()
 CAP_TAG_DIR.mkdir(parents=True, exist_ok=True)
-# Device + model
 _device = "cuda" if torch.cuda.is_available() else "cpu"
 _processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 _model = BlipForConditionalGeneration.from_pretrained(
     "Salesforce/blip-image-captioning-base"
 ).to(_device)
-# Very small stopword list to keep tags clean (no NLTK required)
 _STOP = {
-    "a", "an", "the", "and", "or", "but", "if", "then", "so", "to", "from",
-    "of", "in", "on", "at", "by", "for", "with", "without", "into", "out",
-    "is", "are", "was", "were", "be", "being", "been", "it", "its", "this",
-    "that", "these", "those", "as", "over", "under", "near", "above", "below",
-    "up", "down", "left", "right"
 }
-def _caption_to_tags_simple(caption: str, k: int) -> List[str]:
-    """
-    Convert a caption string to up to k simple tags:
-    - lowercase alphanumeric/hyphen tokens
-    - remove short/stopword tokens
-    - keep first unique occurrences (order-preserving)
-    """
     tokens = _re.findall(r"[a-z0-9-]+", caption.lower())
-    seen, out = set(), []
     for w in tokens:
         if len(w) <= 2 or w in _STOP:
             continue
@@ -64,11 +56,11 @@ def tag_pil_image(
     stem: str,
     *,
     top_k: int = 5,
-    keep_nouns: bool = True,   # kept for API compatibility; ignored
-    keep_adjs: bool = True,    # kept for API compatibility; ignored
-    keep_verbs: bool = True,   # kept for API compatibility; ignored
-) -> List[str]:
-    """Generate a caption and simple tags for a PIL image."""
     inputs = _processor(images=img, return_tensors="pt")
     if _device == "cuda":
         inputs = {k: v.to(_device) for k, v in inputs.items()}
@@ -76,23 +68,16 @@ def tag_pil_image(
         ids = _model.generate(**inputs, max_length=30)
     caption = _processor.decode(ids[0], skip_special_tokens=True)
-    tags = _caption_to_tags_simple(caption, top_k)
     payload = {
         "caption": caption,
         "tags": tags,
         "timestamp": _dt.datetime.now(_dt.timezone.utc).isoformat(),
     }
-    (CAP_TAG_DIR / f"{stem}.json").write_text(_json.dumps(payload, indent=2))
-    return tags
-if __name__ == "__main__":
-    if len(_sys.argv) < 2:
-        _sys.exit("Usage: python tagger.py <image_path> [top_k]")
-    path = _pl.Path(_sys.argv[1])
-    if not path.exists():
-        _sys.exit(f"File not found: {path}")
-    k = int(_sys.argv[2]) if len(_sys.argv) > 2 else 5
-    with Image.open(path).convert("RGB") as im:
-        print("tags:", ", ".join(tag_pil_image(im, path.stem, top_k=k)))

 from __future__ import annotations
 """
+Caption with BLIP and derive simple tags (no POS/NLTK).
+- Tags are first unique non-stopword tokens from the caption.
+- Sidecar saved to ./data/<stem>.json
 """
 import os
 import json as _json
 import pathlib as _pl
 import re as _re
+from typing import List, Tuple
 import torch
 from PIL import Image
 from transformers import BlipForConditionalGeneration, BlipProcessor
+# Writable sidecar directory (writable on Spaces)
 CAP_TAG_DIR = _pl.Path(os.environ.get("CAP_TAG_DIR", "./data")).resolve()
 CAP_TAG_DIR.mkdir(parents=True, exist_ok=True)
+# Device + singletons
 _device = "cuda" if torch.cuda.is_available() else "cpu"
 _processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 _model = BlipForConditionalGeneration.from_pretrained(
     "Salesforce/blip-image-captioning-base"
 ).to(_device)
+# very small stopword set to clean tags
 _STOP = {
+    "a","an","the","and","or","but","if","then","so","to","from",
+    "of","in","on","at","by","for","with","without","into","out",
+    "is","are","was","were","be","being","been","it","its","this",
+    "that","these","those","as","over","under","near","above","below",
+    "up","down","left","right"
 }
+def _caption_to_tags(caption: str, k: int) -> List[str]:
     tokens = _re.findall(r"[a-z0-9-]+", caption.lower())
+    out, seen = [], set()
     for w in tokens:
         if len(w) <= 2 or w in _STOP:
             continue
     stem: str,
     *,
     top_k: int = 5,
+) -> Tuple[str, List[str]]:
+    # sanitize stem for filesystem
+    safe_stem = _re.sub(r"[^A-Za-z0-9_.-]+", "_", stem) or "upload"
+    # caption
     inputs = _processor(images=img, return_tensors="pt")
     if _device == "cuda":
         inputs = {k: v.to(_device) for k, v in inputs.items()}
         ids = _model.generate(**inputs, max_length=30)
     caption = _processor.decode(ids[0], skip_special_tokens=True)
+    # tags
+    tags = _caption_to_tags(caption, top_k)
+    # sidecar
     payload = {
         "caption": caption,
         "tags": tags,
         "timestamp": _dt.datetime.now(_dt.timezone.utc).isoformat(),
     }
+    (CAP_TAG_DIR / f"{safe_stem}.json").write_text(_json.dumps(payload, indent=2))
+    return caption, tags