Spaces:

ProfRom
/

TestSpace3

Sleeping

App Files Files Community

ProfRom commited on Dec 3, 2025

Commit

b28336f

verified ·

1 Parent(s): adbc5fd

Poudel - Sanity Check

Browse files

Files changed (1) hide show

app.py +19 -122

app.py CHANGED Viewed

@@ -1,127 +1,24 @@
 import gradio as gr
 from transformers import pipeline
-from PIL import ImageDraw, ImageFont
-import textwrap
-# --- LOAD MODELS ---
-print("Loading Models...")
-caption_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
-classification_pipeline = pipeline("image-classification", model="google/vit-base-patch16-224")
-sentiment_pipeline = pipeline("sentiment-analysis")
-# --- DRAWING FUNCTION ---
-def add_caption_to_image(image, text):
-    draw = ImageDraw.Draw(image)
-    image_width, image_height = image.size
-    # 1. Setup Font
-    try:
-        font = ImageFont.truetype("DejaVuSans.ttf", 20)
-    except IOError:
-        font = ImageFont.load_default()
-    # 2. Wrap Text
-    avg_char_width = 12
-    chars_per_line = max(10, int((image_width - 40) / avg_char_width))
-    lines = textwrap.wrap(text, width=chars_per_line)
-    # 3. Calculate Box Size
-    line_height = 24
-    total_text_height = len(lines) * line_height
-    y_start = image_height - total_text_height - 20
-    max_line_width = 0
-    for line in lines:
-        bbox = draw.textbbox((0, 0), line, font=font)
-        w = bbox[2] - bbox[0]
-        if w > max_line_width: max_line_width = w
-    box_x = (image_width - max_line_width) / 2
-    # 4. Draw Box
-    padding = 10
-    draw.rectangle(
-        [
-            (box_x - padding, y_start - padding),
-            (box_x + max_line_width + padding, y_start + total_text_height + padding)
-        ],
-        fill=(0, 0, 0, 180)
-    )
-    # 5. Draw Text
-    current_y = y_start
-    for line in lines:
-        bbox = draw.textbbox((0, 0), line, font=font)
-        line_width = bbox[2] - bbox[0]
-        line_x = (image_width - line_width) / 2
-        draw.text((line_x, current_y), line, font=font, fill="white")
-        current_y += line_height
-    return image
-# --- ANALYSIS FUNCTION ---
-def multimodal_analysis(input_image):
-    if input_image is None: return None, "Upload image first", "N/A"
-    processed_image = input_image.copy()
-    # 1. Caption
-    try:
-        caption = caption_pipeline(input_image)[0]['generated_text']
-    except:
-        return processed_image, "Error", "Error"
-    # 2. Draw
-    final_img = add_caption_to_image(processed_image, caption)
-    # 3. Classify
-    try:
-        res = classification_pipeline(input_image)
-        cls_str = f"{res[0]['label']} ({res[0]['score']:.2f})"
-    except:
-        cls_str = "Error"
-    # 4. Sentiment
-    try:
-        sent = sentiment_pipeline(caption)[0]['label']
-    except:
-        sent = "Error"
-    return final_img, cls_str, sent
-# --- INTERFACE (Removed Theme to fix crash) ---
-with gr.Blocks() as demo:
-    gr.Markdown("# 🤖 Multimodal AI Analyst")
-    gr.Markdown("Select an example image below to see: **Image Captioning**, **Vision Classification**, and **NLP Sentiment Analysis** working together.")
-    with gr.Row():
-        with gr.Column():
-            image_input = gr.Image(type="pil", label="Input Image")
-            submit_btn = gr.Button("🔍 Analyze Image", variant="primary")
-        with gr.Column():
-            output_image = gr.Image(label="AI Caption Result")
-            with gr.Row():
-                output_class = gr.Textbox(label="Object Class")
-                output_sent = gr.Textbox(label="Caption Sentiment")
-    # EXACT FILES FROM YOUR LIST
-    examples = [
-        ["Ashe Catcum with Pikachu.png"],
-        ["Beautiful sunrise over ocean.png"],
-        ["Cat on a couch.png"],
-        ["Female Crying.png"],
-        ["Lions Football team huddle.png"],
-        ["michael jordan trophy.png"],
-        ["Puppies playing in grass.png"],
-        ["Red Ferrari.png"],
-        ["Siamese cat.png"],
-        ["Stormy dark sky lightning.png"]
-    ]
-    gr.Examples(examples=examples, inputs=image_input)
-    submit_btn.click(fn=multimodal_analysis, inputs=image_input, outputs=[output_image, output_class, output_sent])
-demo.launch()

 import gradio as gr
 from transformers import pipeline
+# Load image captioning pipeline
+captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
+def generate_caption(image):
+    if image is None:
+        return "Please upload an image."
+    result = captioner(image)
+    return result[0]['generated_text']
+demo = gr.Interface(
+    fn=generate_caption,
+    inputs=gr.Image(type="pil", label="Upload an image"),
+    outputs=gr.Textbox(label="Generated Caption"),
+    title="Image Captioning Demo",
+    description="Multimodal model: Vision → Language"
+)
+if __name__ == "__main__":
+    demo.launch()