Spaces:

BiasLab2025
/

perception

Sleeping

App Files Files Community

Zhen Ye commited on 10 days ago

Commit

58bb3a4

1 Parent(s): 7e7e04e

Add semantic COCO class matching via sentence-transformers and expand synonym table

Browse files

Files changed (2) hide show

coco_classes.py +103 -3
requirements.txt +1 -0

coco_classes.py CHANGED Viewed

@@ -1,8 +1,13 @@
 from __future__ import annotations
 import difflib
 import re
-from typing import Dict, Tuple
 COCO_CLASSES: Tuple[str, ...] = (
     "person",
@@ -105,6 +110,10 @@ _COCO_SYNONYMS: Dict[str, str] = {
     "woman": "person",
     "men": "person",
     "women": "person",
     "motorbike": "motorcycle",
     "motor bike": "motorcycle",
     "bike": "bicycle",
@@ -112,11 +121,28 @@ _COCO_SYNONYMS: Dict[str, str] = {
     "plane": "airplane",
     "jet": "airplane",
     "aeroplane": "airplane",
     "pickup": "truck",
     "pickup truck": "truck",
     "semi": "truck",
     "lorry": "truck",
     "tractor trailer": "truck",
     "coach": "bus",
     "television": "tv",
     "tv monitor": "tv",
@@ -130,8 +156,80 @@ _COCO_SYNONYMS: Dict[str, str] = {
 _ALIAS_LOOKUP: Dict[str, str] = {_normalize(alias): canonical for alias, canonical in _COCO_SYNONYMS.items()}
 def canonicalize_coco_name(value: str | None) -> str | None:
-    """Map an arbitrary string to the closest COCO class name if possible."""
     if not value:
         return None
@@ -160,4 +258,6 @@ def canonicalize_coco_name(value: str | None) -> str | None:
     close = difflib.get_close_matches(normalized, list(_CANONICAL_LOOKUP.keys()), n=1, cutoff=0.82)
     if close:
         return _CANONICAL_LOOKUP[close[0]]
-    return None

 from __future__ import annotations
 import difflib
+import logging
 import re
+from typing import Dict, Optional, Tuple
+import numpy as np
+logger = logging.getLogger(__name__)
 COCO_CLASSES: Tuple[str, ...] = (
     "person",
     "woman": "person",
     "men": "person",
     "women": "person",
+    "pedestrian": "person",
+    "soldier": "person",
+    "infantry": "person",
+    "civilian": "person",
     "motorbike": "motorcycle",
     "motor bike": "motorcycle",
     "bike": "bicycle",
     "plane": "airplane",
     "jet": "airplane",
     "aeroplane": "airplane",
+    "drone": "airplane",
+    "uav": "airplane",
+    "helicopter": "airplane",
     "pickup": "truck",
     "pickup truck": "truck",
     "semi": "truck",
     "lorry": "truck",
     "tractor trailer": "truck",
+    "vehicle": "car",
+    "sedan": "car",
+    "suv": "car",
+    "van": "car",
+    "vessel": "boat",
+    "ship": "boat",
+    "warship": "boat",
+    "speedboat": "boat",
+    "cargo ship": "boat",
+    "fishing boat": "boat",
+    "yacht": "boat",
+    "kayak": "boat",
+    "canoe": "boat",
+    "watercraft": "boat",
     "coach": "bus",
     "television": "tv",
     "tv monitor": "tv",
 _ALIAS_LOOKUP: Dict[str, str] = {_normalize(alias): canonical for alias, canonical in _COCO_SYNONYMS.items()}
+# ---------------------------------------------------------------------------
+# Semantic similarity fallback (lazy-loaded)
+# ---------------------------------------------------------------------------
+_SEMANTIC_MODEL = None
+_COCO_EMBEDDINGS: Optional[np.ndarray] = None
+_SEMANTIC_THRESHOLD = 0.65  # Minimum cosine similarity to accept a match
+def _get_semantic_model():
+    """Lazy-load a lightweight sentence-transformer for semantic matching."""
+    global _SEMANTIC_MODEL, _COCO_EMBEDDINGS
+    if _SEMANTIC_MODEL is not None:
+        return _SEMANTIC_MODEL, _COCO_EMBEDDINGS
+    try:
+        from sentence_transformers import SentenceTransformer
+        _SEMANTIC_MODEL = SentenceTransformer("all-MiniLM-L6-v2")
+        # Prefix with "a photo of a" to anchor embeddings in visual/object space
+        coco_phrases = [f"a photo of a {cls}" for cls in COCO_CLASSES]
+        _COCO_EMBEDDINGS = _SEMANTIC_MODEL.encode(
+            coco_phrases, normalize_embeddings=True
+        )
+        logger.info("Loaded semantic similarity model for COCO class mapping")
+    except Exception:
+        logger.warning("sentence-transformers unavailable; semantic COCO mapping disabled", exc_info=True)
+        _SEMANTIC_MODEL = False  # Sentinel: tried and failed
+        _COCO_EMBEDDINGS = None
+    return _SEMANTIC_MODEL, _COCO_EMBEDDINGS
+def _semantic_coco_match(value: str) -> Optional[str]:
+    """Find the closest COCO class by embedding cosine similarity.
+    Returns the COCO class name if similarity >= threshold, else None.
+    """
+    model, coco_embs = _get_semantic_model()
+    if model is False or coco_embs is None:
+        return None
+    query_emb = model.encode(
+        [f"a photo of a {value}"], normalize_embeddings=True
+    )
+    similarities = query_emb @ coco_embs.T  # (1, 80)
+    best_idx = int(np.argmax(similarities))
+    best_score = float(similarities[0, best_idx])
+    if best_score >= _SEMANTIC_THRESHOLD:
+        matched = COCO_CLASSES[best_idx]
+        logger.info(
+            "Semantic COCO match: '%s' -> '%s' (score=%.3f)",
+            value, matched, best_score,
+        )
+        return matched
+    logger.debug(
+        "Semantic COCO match failed: '%s' best='%s' (score=%.3f < %.2f)",
+        value, COCO_CLASSES[best_idx], best_score, _SEMANTIC_THRESHOLD,
+    )
+    return None
 def canonicalize_coco_name(value: str | None) -> str | None:
+    """Map an arbitrary string to the closest COCO class name if possible.
+    Matching cascade:
+    1. Exact normalized match
+    2. Synonym lookup
+    3. Substring match (alias then canonical)
+    4. Token-level match
+    5. Fuzzy string match (difflib)
+    6. Semantic embedding similarity (sentence-transformers)
+    """
     if not value:
         return None
     close = difflib.get_close_matches(normalized, list(_CANONICAL_LOOKUP.keys()), n=1, cutoff=0.82)
     if close:
         return _CANONICAL_LOOKUP[close[0]]
+    # Last resort: semantic embedding similarity
+    return _semantic_coco_match(value)

requirements.txt CHANGED Viewed

@@ -9,3 +9,4 @@ huggingface-hub
 ultralytics
 python-dotenv
 einops

 ultralytics
 python-dotenv
 einops
+sentence-transformers