Spaces:

Tulitula
/

Add-feedback

Sleeping

App Files Files Community

Tulitula commited on Jul 11, 2025

Commit

71dc617

verified ·

1 Parent(s): 21890e3

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -59

app.py CHANGED Viewed

@@ -1,91 +1,93 @@
 import re
 import gradio as gr
 from PIL import Image
-from transformers import AutoProcessor, AutoModelForVision2Seq, pipeline
-# 1) Load BLIP-2 processor & model
-processor = AutoProcessor.from_pretrained("Salesforce/blip2-flan-t5-xl")
-model     = AutoModelForVision2Seq.from_pretrained("Salesforce/blip2-flan-t5-xl")
-# 2) Build the multimodal pipeline correctly
-pipe = pipeline(
-    "image-text-to-text",
-    model=model,
-    feature_extractor=processor.image_processor,   # BLIP2Processor uses .image_processor
-    tokenizer=processor.tokenizer,
-    max_new_tokens=500,
-    do_sample=True,
-    temperature=1.0,
-    top_k=50,
-    top_p=0.9,
 )
-def get_recommendations():
     return [
         "https://i.imgur.com/InC88PP.jpeg",
         "https://i.imgur.com/7BHfv4T.png",
         "https://i.imgur.com/wp3Wzc4.jpeg",
         "https://i.imgur.com/5e2xOA4.jpeg",
         "https://i.imgur.com/txjRk98.jpeg",
-        "https://i.imgur.com/rQ4AYl0.jpeg",
-        "https://i.imgur.com/bDzwD04.jpeg",
-        "https://i.imgur.com/fLMngXI.jpeg",
-        "https://i.imgur.com/nYEJzxt.png",
-        "https://i.imgur.com/Xj92Cjv.jpeg",
     ]
 def process(image: Image):
-    # A single prompt that asks BLIP-2+Flan-T5 to emit exactly three sections
-    prompt = (
-        "You are an expert ad critic. Given the image below, output exactly three sections:\n\n"
-        "Category: <one concise label>\n\n"
-        "Analysis: <exactly five sentences explaining what the ad communicates and its emotional impact>\n\n"
-        "Suggestions:\n"
-        "- <bullet 1>\n"
-        "- <bullet 2>\n"
-        "- <bullet 3>\n"
-        "- <bullet 4>\n"
-        "- <bullet 5>\n"
-    )
-    # Run the pipeline
-    out = pipe(image, prompt=prompt)[0]["generated_text"]
-    # Regex-extract each section
-    cat_match = re.search(r"Category:(.*?)Analysis:", out, re.S)
-    ana_match = re.search(r"Analysis:(.*?)Suggestions:", out, re.S)
-    sug_match = re.search(r"Suggestions:(.*)", out, re.S)
-    category    = cat_match.group(1).strip() if cat_match else ""
-    analysis    = ana_match.group(1).strip() if ana_match else ""
-    suggestions = sug_match.group(1).strip() if sug_match else ""
-    # Ensure exactly 5 bullets
-    bullets = [line for line in suggestions.splitlines() if line.startswith("-")]
     if len(bullets) < 5:
-        bullets += ["- (no suggestion)"] * (5 - len(bullets))
     suggestions = "\n".join(bullets[:5])
-    return category, analysis, suggestions, get_recommendations()
-# --- Gradio UI ---
 with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
     gr.Markdown("## 📢 Smart Ad Analyzer")
     gr.Markdown(
-        "Upload an image ad to see: an **Ad Category**, a **five-sentence Analysis**, "
-        "**five bullet-point Suggestions**, and **Example Ads**."
     )
     with gr.Row():
-        img = gr.Image(type="pil", label="Upload Ad Image")
         with gr.Column():
-            cat_out = gr.Textbox(label="Ad Category", interactive=False)
-            ana_out = gr.Textbox(label="Ad Analysis", lines=5, interactive=False)
-            sug_out = gr.Textbox(label="Improvement Suggestions", lines=5, interactive=False)
-            btn     = gr.Button("Analyze Ad", size="sm", variant="primary")
-    gallery = gr.Gallery(label="Recommended Example Ads", show_label=True)
-    btn.click(fn=process, inputs=[img], outputs=[cat_out, ana_out, sug_out, gallery])
     gr.Markdown("Made by Simon Thalmay")

+# app.py
 import re
 import gradio as gr
 from PIL import Image
+from transformers import (
+    AutoProcessor,
+    AutoModelForVision2Seq,
+    pipeline,
 )
+# 1 – BLIP-large for image captioning
+processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
+model     = AutoModelForVision2Seq.from_pretrained("Salesforce/blip-image-captioning-large")
+def generate_caption(image: Image) -> str:
+    inputs = processor(images=image, return_tensors="pt")
+    outputs = model.generate(**inputs)
+    return processor.tokenizer.decode(outputs[0], skip_special_tokens=True)
+# 2 – Flan-T5 pipelines
+def make_pipe(model_name, max_tokens):
+    return pipeline(
+        "text2text-generation",
+        model=model_name,
+        tokenizer=model_name,
+        max_new_tokens=max_tokens,
+        do_sample=True,
+        temperature=1.0,
+    )
+cat_pipe = make_pipe("google/flan-t5-small", 80)
+ana_pipe = make_pipe("google/flan-t5-small", 200)
+sug_pipe = make_pipe("google/flan-t5-small", 200)
+# 3 – Recommendation gallery
+def get_recs():
     return [
         "https://i.imgur.com/InC88PP.jpeg",
         "https://i.imgur.com/7BHfv4T.png",
         "https://i.imgur.com/wp3Wzc4.jpeg",
         "https://i.imgur.com/5e2xOA4.jpeg",
         "https://i.imgur.com/txjRk98.jpeg",
     ]
+# 4 – Full workflow
 def process(image: Image):
+    caption = generate_caption(image)
+    # category
+    raw_cat = cat_pipe(f"Caption: {caption}\nLabel this ad in one phrase:")[0]["generated_text"]
+    category = raw_cat.strip().splitlines()[0]
+    # analysis
+    raw_ana = ana_pipe(
+        f"Caption: {caption}\nWrite exactly five sentences explaining what this ad communicates and its emotional impact."
+    )[0]["generated_text"]
+    sentences = re.split(r'(?<=[.!?])\s+', raw_ana.strip())
+    analysis = " ".join(sentences[:5])
+    # suggestions
+    raw_sug = sug_pipe(
+        f"Caption: {caption}\nSuggest five distinct improvements as bullets, each starting with '- '."
+    )[0]["generated_text"]
+    bullets = [l for l in raw_sug.splitlines() if l.strip().startswith("-")]
     if len(bullets) < 5:
+        lines = [l.strip() for l in raw_sug.splitlines() if l.strip()]
+        bullets = [("- " + lines[i]) for i in range(min(5, len(lines)))]
     suggestions = "\n".join(bullets[:5])
+    return category, analysis, suggestions, get_recs()
+# 5 – Gradio UI
 with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
     gr.Markdown("## 📢 Smart Ad Analyzer")
     gr.Markdown(
+        "Upload an image ad to get: a Category, five-sentence Analysis, "
+        "five bullet-point Suggestions, and Example Ads."
     )
     with gr.Row():
+        inp = gr.Image(type="pil", label="Upload Ad Image")
         with gr.Column():
+            out_cat = gr.Textbox(label="Ad Category", interactive=False)
+            out_ana = gr.Textbox(label="Ad Analysis", lines=5, interactive=False)
+            out_sug = gr.Textbox(label="Improvement Suggestions", lines=5, interactive=False)
+            btn     = gr.Button("Analyze Ad", size="sm")
+    gallery = gr.Gallery(label="Example Ads", show_label=True)
+    btn.click(process, inputs=[inp], outputs=[out_cat, out_ana, out_sug, gallery])
     gr.Markdown("Made by Simon Thalmay")