Spaces:

Mohansai2004
/

Image_analysis

Running

App Files Files Community

Mohansai2004 commited on Aug 9, 2025

Commit

0290b84

verified ·

1 Parent(s): b6e5d1c

Update app/caption_model.py

Browse files

Files changed (1) hide show

app/caption_model.py +43 -1

app/caption_model.py CHANGED Viewed

@@ -11,7 +11,49 @@ def caption_image(image: Image.Image):
         raise ValueError("Input must be a valid PIL Image in RGB or grayscale format")
     # Run object detection
-    results = detector(image)
     # Track highest score per object
     objects_dict = {}

         raise ValueError("Input must be a valid PIL Image in RGB or grayscale format")
     # Run object detection
+from transformers import pipeline
+from PIL import Image
+# Load object detection model
+MODEL_NAME = "hustvl/yolos-small"
+detector = pipeline("object-detection", model=MODEL_NAME)
+def caption_image(image: Image.Image):
+    # Validate input
+    if not isinstance(image, Image.Image) or image.mode not in ('RGB', 'L'):
+        raise ValueError("Input must be a valid PIL Image in RGB or grayscale format")
+    # Run object detection with custom parameters
+    results = detector(image, top_k=20, threshold=0.2)
+    # Track highest score per object
+    objects_dict = {}
+    for result in results:
+        label = result['label']
+        score = result['score']
+        if label in objects_dict:
+            objects_dict[label] = max(objects_dict[label], score)
+        else:
+            objects_dict[label] = score
+    # Build structured list of objects
+    objects_list = [
+        {"label": label, "score": round(score, 2)}
+        for label, score in sorted(objects_dict.items(), key=lambda x: x[1], reverse=True)
+    ]
+    # Create readable caption
+    detected_objects = [f"{obj['label']} ({obj['score']:.2f})" for obj in objects_list]
+    caption = "Detected objects: " + ", ".join(detected_objects) if detected_objects else "No objects detected."
+    # Highest confidence score
+    max_confidence = max(objects_dict.values()) if objects_dict else 0.0
+    return {
+        "caption": caption,
+        "objects": objects_list,
+        "confidence": round(max_confidence, 2)
+    }
     # Track highest score per object
     objects_dict = {}