Spaces:

Sbwg
/

test

Runtime error

Sbwg commited on Oct 15, 2025

Commit

4d15a8e

verified ·

1 Parent(s): 90c7d1b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,25 +1,30 @@
 import gradio as gr
-from transformers import pipeline
-# Load a simple image classification model
-classifier = pipeline("image-classification", model="google/vit-large-patch16-224-in21k")
-def classify_image(image):
-    # Run prediction
-    results = classifier(image)
-    # Get top label and confidence
-    top_result = results[0]
-    label = top_result["label"]
-    score = round(top_result["score"] * 100, 2)
-    return f"{label} ({score}%)"
-# Simple Gradio UI
 demo = gr.Interface(
-    fn=classify_image,
     inputs=gr.Image(type="pil"),
     outputs="text",
-    title="🖼️ Simple Image Classifier",
-    description="what is this food"
 )
 if __name__ == "__main__":

 import gradio as gr
+from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
+from PIL import Image
+# Load the stronger model
+model_id = "nlpconnect/vit-gpt2-image-captioning"
+model = VisionEncoderDecoderModel.from_pretrained(model_id)
+feature_extractor = ViTImageProcessor.from_pretrained(model_id)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+def classify_better(image):
+    # preprocess
+    if image.mode != "RGB":
+        image = image.convert("RGB")
+    pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
+    # Generate caption
+    output_ids = model.generate(pixel_values, max_length=20, num_beams=5)
+    caption = tokenizer.decode(output_ids[0], skip_special_tokens=True).strip()
+    return caption
 demo = gr.Interface(
+    fn=classify_better,
     inputs=gr.Image(type="pil"),
     outputs="text",
+    title="Better Image Captioning",
+    description="Upload an image and the model will try to describe it (better)."
 )
 if __name__ == "__main__":