Spaces:

Tulitula
/

Add-feedback

Sleeping

App Files Files Community

Tulitula commited on Jul 11, 2025

Commit

cb67003

verified ·

1 Parent(s): 169b299

Update app.py

Browse files

Files changed (1) hide show

app.py +132 -97

app.py CHANGED Viewed

@@ -2,59 +2,80 @@ import re
 import gradio as gr
 import torch
 from PIL import Image
-from transformers import pipeline, AutoProcessor, AutoModelForVision2Seq
 # Auto-detect CPU/GPU
 device = 0 if torch.cuda.is_available() else -1
-# 1) BLIP captioner
-processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
-model = AutoModelForVision2Seq.from_pretrained("Salesforce/blip-image-captioning-large")
-caption_pipe = pipeline(
-    "image-to-text",
-    model=model,
-    processor=processor,
-    device=device
-)
-# 2) Flan-T5 for text-to-text
-FLAN = "google/flan-t5-large"
-category_pipe = pipeline(
-    "text2text-generation",
-    model=FLAN,
-    tokenizer=FLAN,
-    device=device,
-    max_new_tokens=32,
-    do_sample=True,
-    temperature=1.0,
-)
-analysis_pipe = pipeline(
-    "text2text-generation",
-    model=FLAN,
-    tokenizer=FLAN,
-    device=device,
-    max_new_tokens=256,
-    do_sample=True,
-    temperature=1.0,
-)
-suggestion_pipe = pipeline(
-    "text2text-generation",
-    model=FLAN,
-    tokenizer=FLAN,
-    device=device,
-    max_new_tokens=256,
-    do_sample=True,
-    temperature=1.0,
-)
-# Expander when BLIP caption is too short
-expansion_pipe = pipeline(
-    "text2text-generation",
-    model=FLAN,
-    tokenizer=FLAN,
-    device=device,
-    max_new_tokens=128,
-    do_sample=False,
-)
 # Example gallery helper returns 10 example ad URLs
 def get_recommendations():
@@ -71,53 +92,67 @@ def get_recommendations():
         "https://i.imgur.com/Xj92Cjv.jpeg",
     ]
-# Main processing function
 def process(image: Image):
-    # 1) BLIP caption
-    caption = caption_pipe(image, max_new_tokens=64, do_sample=False)[0]['generated_text'].strip()
-    # 1a) Expand caption if too short
-    if len(caption.split()) < 3:
-        desc = expansion_pipe(f"Expand into a detailed description: {caption}")[0]['generated_text'].strip()
-    else:
-        desc = caption
-    # 2) Ad category
-    cat_prompt = (
-        f"Description: {desc}\n\n"
-        "Provide a concise category label for this ad (e.g. 'Food', 'Fitness'):"
-    )
-    category = category_pipe(cat_prompt)[0]['generated_text'].splitlines()[0].strip()
-    # 3) Five-sentence analysis
-    ana_prompt = (
-        f"Description: {desc}\n\n"
-        "Write exactly five sentences explaining what this ad communicates and its emotional impact."
-    )
-    raw_ana = analysis_pipe(ana_prompt)[0]['generated_text'].strip()
-    sentences = re.split(r'(?<=[.!?])\s+', raw_ana)
-    analysis = " ".join(sentences[:5])
-    # 4) Five bullet-point suggestions
-    sug_prompt = (
-        f"Description: {desc}\n\n"
-        "Suggest five distinct improvements for this ad. Each must start with '- ' and be one sentence."
-    )
-    raw_sug = suggestion_pipe(sug_prompt)[0]['generated_text'].strip()
-    bullets = [l for l in raw_sug.splitlines() if l.startswith('-')]
-    if len(bullets) < 5:
-        extra_lines = [l for l in raw_sug.splitlines() if l.strip()]
-        for line in extra_lines:
-            if len(bullets) >= 5:
-                break
-            bullets.append(line if line.startswith('-') else '- ' + line)
-    suggestions = '\n'.join(bullets[:5])
-    return caption, category, analysis, suggestions, get_recommendations()
-# Gradio UI
 def main():
-    with gr.Blocks() as demo:
         gr.Markdown("## 📢 Smart Ad Analyzer")
         gr.Markdown(
             "Upload an image ad to get:\n"
@@ -137,7 +172,7 @@ def main():
                 sug_out = gr.Textbox(label='Improvement Suggestions', lines=5, interactive=False)
                 btn = gr.Button('Analyze Ad', size='sm', variant='primary')
-        gallery = gr.Gallery(label='Example Ads')
         btn.click(
             fn=process,
@@ -147,8 +182,8 @@ def main():
         gr.Markdown('Made by Simon Thalmay')
-    demo.launch()
 if __name__ == '__main__':
-    main()

 import gradio as gr
 import torch
 from PIL import Image
+from transformers import pipeline, AutoProcessor, AutoModelForVision2Seq, AutoTokenizer, AutoModelForSeq2SeqLM
 # Auto-detect CPU/GPU
 device = 0 if torch.cuda.is_available() else -1
+# 1) BLIP captioner - Fixed tokenizer usage
+try:
+    processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
+    model = AutoModelForVision2Seq.from_pretrained("Salesforce/blip-image-captioning-large")
+    caption_pipe = pipeline(
+        "image-to-text",
+        model=model,
+        feature_extractor=processor.feature_extractor,
+        tokenizer=processor.tokenizer,
+        device=device
+    )
+    print("✅ BLIP model loaded successfully")
+except Exception as e:
+    print(f"❌ Error loading BLIP model: {e}")
+    raise
+# 2) Flan-T5 for text-to-text - Fixed tokenizer initialization
+FLAN_MODEL = "google/flan-t5-large"
+try:
+    # Load tokenizer and model separately for better control
+    flan_tokenizer = AutoTokenizer.from_pretrained(FLAN_MODEL)
+    flan_model = AutoModelForSeq2SeqLM.from_pretrained(FLAN_MODEL)
+    # Create pipelines with explicit tokenizer
+    category_pipe = pipeline(
+        "text2text-generation",
+        model=flan_model,
+        tokenizer=flan_tokenizer,
+        device=device,
+        max_new_tokens=32,
+        do_sample=True,
+        temperature=1.0,
+    )
+    analysis_pipe = pipeline(
+        "text2text-generation",
+        model=flan_model,
+        tokenizer=flan_tokenizer,
+        device=device,
+        max_new_tokens=256,
+        do_sample=True,
+        temperature=1.0,
+    )
+    suggestion_pipe = pipeline(
+        "text2text-generation",
+        model=flan_model,
+        tokenizer=flan_tokenizer,
+        device=device,
+        max_new_tokens=256,
+        do_sample=True,
+        temperature=1.0,
+    )
+    # Expander when BLIP caption is too short
+    expansion_pipe = pipeline(
+        "text2text-generation",
+        model=flan_model,
+        tokenizer=flan_tokenizer,
+        device=device,
+        max_new_tokens=128,
+        do_sample=False,
+    )
+    print("✅ Flan-T5 model loaded successfully")
+except Exception as e:
+    print(f"❌ Error loading Flan-T5 model: {e}")
+    raise
 # Example gallery helper returns 10 example ad URLs
 def get_recommendations():
         "https://i.imgur.com/Xj92Cjv.jpeg",
     ]
+# Main processing function with error handling
 def process(image: Image):
+    try:
+        if image is None:
+            return "Please upload an image", "", "", "", get_recommendations()
+        # 1) BLIP caption
+        caption_result = caption_pipe(image, max_new_tokens=64, do_sample=False)
+        caption = caption_result[0]['generated_text'].strip()
+        # 1a) Expand caption if too short
+        if len(caption.split()) < 3:
+            desc_result = expansion_pipe(f"Expand into a detailed description: {caption}")
+            desc = desc_result[0]['generated_text'].strip()
+        else:
+            desc = caption
+        # 2) Ad category
+        cat_prompt = (
+            f"Description: {desc}\n\n"
+            "Provide a concise category label for this ad (e.g. 'Food', 'Fitness'):"
+        )
+        category_result = category_pipe(cat_prompt)
+        category = category_result[0]['generated_text'].splitlines()[0].strip()
+        # 3) Five-sentence analysis
+        ana_prompt = (
+            f"Description: {desc}\n\n"
+            "Write exactly five sentences explaining what this ad communicates and its emotional impact."
+        )
+        raw_ana_result = analysis_pipe(ana_prompt)
+        raw_ana = raw_ana_result[0]['generated_text'].strip()
+        sentences = re.split(r'(?<=[.!?])\s+', raw_ana)
+        analysis = " ".join(sentences[:5])
+        # 4) Five bullet-point suggestions
+        sug_prompt = (
+            f"Description: {desc}\n\n"
+            "Suggest five distinct improvements for this ad. Each must start with '- ' and be one sentence."
+        )
+        raw_sug_result = suggestion_pipe(sug_prompt)
+        raw_sug = raw_sug_result[0]['generated_text'].strip()
+        bullets = [l for l in raw_sug.splitlines() if l.startswith('-')]
+        if len(bullets) < 5:
+            extra = [l for l in raw_sug.splitlines() if l.strip()]
+            for line in extra:
+                if len(bullets) >= 5:
+                    break
+                bullets.append(line if line.startswith('-') else '- ' + line)
+        suggestions = '\n'.join(bullets[:5])
+        return caption, category, analysis, suggestions, get_recommendations()
+    except Exception as e:
+        error_msg = f"Error processing image: {str(e)}"
+        print(error_msg)
+        return error_msg, "", "", "", get_recommendations()
+# Gradio UI definition
 def main():
+    with gr.Blocks(title="Smart Ad Analyzer") as demo:
         gr.Markdown("## 📢 Smart Ad Analyzer")
         gr.Markdown(
             "Upload an image ad to get:\n"
                 sug_out = gr.Textbox(label='Improvement Suggestions', lines=5, interactive=False)
                 btn = gr.Button('Analyze Ad', size='sm', variant='primary')
+        gallery = gr.Gallery(label='Example Ads', value=get_recommendations())
         btn.click(
             fn=process,
         gr.Markdown('Made by Simon Thalmay')
+    return demo
 if __name__ == '__main__':
+    demo = main()
+    demo.launch()