Tulitula commited on
Commit
e285a5c
·
verified ·
1 Parent(s): 71dc617

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +109 -60
app.py CHANGED
@@ -1,93 +1,142 @@
1
- # app.py
2
-
3
  import re
4
  import gradio as gr
5
  from PIL import Image
6
  from transformers import (
7
- AutoProcessor,
8
- AutoModelForVision2Seq,
9
  pipeline,
10
  )
11
 
12
- # 1 BLIP-large for image captioning
13
- processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
14
- model = AutoModelForVision2Seq.from_pretrained("Salesforce/blip-image-captioning-large")
15
-
16
- def generate_caption(image: Image) -> str:
17
- inputs = processor(images=image, return_tensors="pt")
18
- outputs = model.generate(**inputs)
19
- return processor.tokenizer.decode(outputs[0], skip_special_tokens=True)
20
-
21
- # 2 – Flan-T5 pipelines
22
- def make_pipe(model_name, max_tokens):
23
- return pipeline(
24
- "text2text-generation",
25
- model=model_name,
26
- tokenizer=model_name,
27
- max_new_tokens=max_tokens,
28
- do_sample=True,
29
- temperature=1.0,
30
- )
31
 
32
- cat_pipe = make_pipe("google/flan-t5-small", 80)
33
- ana_pipe = make_pipe("google/flan-t5-small", 200)
34
- sug_pipe = make_pipe("google/flan-t5-small", 200)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- # 3 – Recommendation gallery
37
- def get_recs():
38
  return [
39
  "https://i.imgur.com/InC88PP.jpeg",
40
  "https://i.imgur.com/7BHfv4T.png",
41
  "https://i.imgur.com/wp3Wzc4.jpeg",
42
  "https://i.imgur.com/5e2xOA4.jpeg",
43
  "https://i.imgur.com/txjRk98.jpeg",
 
 
 
 
 
44
  ]
45
 
46
- # 4 – Full workflow
47
  def process(image: Image):
48
- caption = generate_caption(image)
49
-
50
- # category
51
- raw_cat = cat_pipe(f"Caption: {caption}\nLabel this ad in one phrase:")[0]["generated_text"]
52
- category = raw_cat.strip().splitlines()[0]
53
-
54
- # analysis
55
- raw_ana = ana_pipe(
56
- f"Caption: {caption}\nWrite exactly five sentences explaining what this ad communicates and its emotional impact."
57
- )[0]["generated_text"]
58
- sentences = re.split(r'(?<=[.!?])\s+', raw_ana.strip())
 
 
 
 
 
 
 
 
 
59
  analysis = " ".join(sentences[:5])
 
60
 
61
- # suggestions
62
- raw_sug = sug_pipe(
63
- f"Caption: {caption}\nSuggest five distinct improvements as bullets, each starting with '- '."
64
- )[0]["generated_text"]
65
- bullets = [l for l in raw_sug.splitlines() if l.strip().startswith("-")]
66
- if len(bullets) < 5:
67
- lines = [l.strip() for l in raw_sug.splitlines() if l.strip()]
68
- bullets = [("- " + lines[i]) for i in range(min(5, len(lines)))]
69
- suggestions = "\n".join(bullets[:5])
70
-
71
- return category, analysis, suggestions, get_recs()
 
 
 
 
 
 
 
 
72
 
73
- # 5 – Gradio UI
74
  with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
75
  gr.Markdown("## 📢 Smart Ad Analyzer")
76
  gr.Markdown(
77
- "Upload an image ad to get: a Category, five-sentence Analysis, "
78
- "five bullet-point Suggestions, and Example Ads."
79
  )
80
 
81
  with gr.Row():
82
- inp = gr.Image(type="pil", label="Upload Ad Image")
83
  with gr.Column():
84
- out_cat = gr.Textbox(label="Ad Category", interactive=False)
85
- out_ana = gr.Textbox(label="Ad Analysis", lines=5, interactive=False)
86
- out_sug = gr.Textbox(label="Improvement Suggestions", lines=5, interactive=False)
87
- btn = gr.Button("Analyze Ad", size="sm")
 
88
 
89
  gallery = gr.Gallery(label="Example Ads", show_label=True)
90
- btn.click(process, inputs=[inp], outputs=[out_cat, out_ana, out_sug, gallery])
 
 
 
 
 
91
 
92
  gr.Markdown("Made by Simon Thalmay")
93
 
 
1
+ import logging
 
2
  import re
3
  import gradio as gr
4
  from PIL import Image
5
  from transformers import (
6
+ BlipProcessor,
7
+ BlipForConditionalGeneration,
8
  pipeline,
9
  )
10
 
11
+ # Set up logging
12
+ logging.basicConfig(level=logging.INFO)
13
+
14
+ # 1) BLIP captioner (large model for richer captions)
15
+ caption_processor = BlipProcessor.from_pretrained(
16
+ "Salesforce/blip-image-captioning-large",
17
+ use_fast=False
18
+ )
19
+ caption_model = BlipForConditionalGeneration.from_pretrained(
20
+ "Salesforce/blip-image-captioning-large"
21
+ )
 
 
 
 
 
 
 
 
22
 
23
+ caption_pipe = pipeline(
24
+ task="image-to-text",
25
+ model=caption_model,
26
+ processor=caption_processor,
27
+ device=-1,
28
+ max_length=64,
29
+ do_sample=False,
30
+ )
31
+
32
+ # 2) Flan-T5 pipelines for category, analysis, suggestions
33
+ FLAN_MODEL = "google/flan-t5-large"
34
+ category_pipe = pipeline(
35
+ "text2text-generation",
36
+ model=FLAN_MODEL,
37
+ tokenizer=FLAN_MODEL,
38
+ max_new_tokens=32,
39
+ do_sample=True,
40
+ temperature=1.0,
41
+ )
42
+ analysis_pipe = pipeline(
43
+ "text2text-generation",
44
+ model=FLAN_MODEL,
45
+ tokenizer=FLAN_MODEL,
46
+ max_new_tokens=256,
47
+ do_sample=True,
48
+ temperature=1.0,
49
+ )
50
+ suggestion_pipe = pipeline(
51
+ "text2text-generation",
52
+ model=FLAN_MODEL,
53
+ tokenizer=FLAN_MODEL,
54
+ max_new_tokens=256,
55
+ do_sample=True,
56
+ temperature=1.0,
57
+ )
58
 
59
+ def get_recommendations():
 
60
  return [
61
  "https://i.imgur.com/InC88PP.jpeg",
62
  "https://i.imgur.com/7BHfv4T.png",
63
  "https://i.imgur.com/wp3Wzc4.jpeg",
64
  "https://i.imgur.com/5e2xOA4.jpeg",
65
  "https://i.imgur.com/txjRk98.jpeg",
66
+ "https://i.imgur.com/rQ4AYl0.jpeg",
67
+ "https://i.imgur.com/bDzwD04.jpeg",
68
+ "https://i.imgur.com/fLMngXI.jpeg",
69
+ "https://i.imgur.com/nYEJzxt.png",
70
+ "https://i.imgur.com/Xj92Cjv.jpeg",
71
  ]
72
 
 
73
  def process(image: Image):
74
+ # Step 1: Generate BLIP caption
75
+ caption = caption_pipe(image)[0]["generated_text"].strip()
76
+ logging.info(f"RAW CAPTION: {caption}")
77
+
78
+ # Step 2: Category label
79
+ cat_prompt = (
80
+ f"Caption: {caption}\n"
81
+ "Provide a single concise category label for this ad (e.g. 'Food Ad', 'Fitness Promotion'):"
82
+ )
83
+ raw_cat = category_pipe(cat_prompt)[0]["generated_text"].strip()
84
+ category = raw_cat.splitlines()[0]
85
+ logging.info(f"RAW CATEGORY: {raw_cat}")
86
+
87
+ # Step 3: Five-sentence Analysis
88
+ ana_prompt = (
89
+ f"Caption: {caption}\n"
90
+ "Write exactly five sentences explaining what this ad communicates and its emotional impact."
91
+ )
92
+ raw_ana = analysis_pipe(ana_prompt)[0]["generated_text"].strip()
93
+ sentences = re.split(r'(?<=[.!?])\s+', raw_ana)
94
  analysis = " ".join(sentences[:5])
95
+ logging.info(f"RAW ANALYSIS: {raw_ana}")
96
 
97
+ # Step 4: Five bullet-point Suggestions
98
+ sug_prompt = (
99
+ f"Caption: {caption}\n"
100
+ "Suggest five distinct improvements for this ad. "
101
+ "Each suggestion must start with '- ' and be one actionable sentence."
102
+ )
103
+ raw_sug = suggestion_pipe(sug_prompt)[0]["generated_text"].strip()
104
+ lines = [l for l in raw_sug.splitlines() if l.strip().startswith("-")]
105
+ if len(lines) < 5:
106
+ extras = [l.strip() for l in raw_sug.splitlines() if l.strip()]
107
+ for ex in extras:
108
+ if len(lines) >= 5:
109
+ break
110
+ prefix = "- " if not ex.startswith("-") else ""
111
+ lines.append(prefix + ex.lstrip("- ").strip())
112
+ suggestions = "\n".join(lines[:5])
113
+ logging.info(f"RAW SUGGESTIONS:\n{raw_sug}")
114
+
115
+ return caption, category, analysis, suggestions, get_recommendations()
116
 
 
117
  with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
118
  gr.Markdown("## 📢 Smart Ad Analyzer")
119
  gr.Markdown(
120
+ "Upload an image ad to get: a **BLIP Caption** (debug), a **Category**, a "
121
+ "**five-sentence Analysis**, **five bullet-point Suggestions**, and **Example Ads**."
122
  )
123
 
124
  with gr.Row():
125
+ img = gr.Image(type="pil", label="Upload Ad Image")
126
  with gr.Column():
127
+ debug_cap = gr.Textbox(label="🔍 BLIP Caption (debug)", interactive=False)
128
+ cat_out = gr.Textbox(label="Ad Category", interactive=False)
129
+ ana_out = gr.Textbox(label="Ad Analysis", lines=5, interactive=False)
130
+ sug_out = gr.Textbox(label="Improvement Suggestions", lines=5, interactive=False)
131
+ btn = gr.Button("Analyze Ad")
132
 
133
  gallery = gr.Gallery(label="Example Ads", show_label=True)
134
+
135
+ btn.click(
136
+ fn=process,
137
+ inputs=[img],
138
+ outputs=[debug_cap, cat_out, ana_out, sug_out, gallery],
139
+ )
140
 
141
  gr.Markdown("Made by Simon Thalmay")
142