Tulitula commited on
Commit
93e5528
·
verified ·
1 Parent(s): d275ca5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -73
app.py CHANGED
@@ -2,40 +2,31 @@ import os
2
  import gradio as gr
3
  import torch
4
  from PIL import Image
5
- from transformers import pipeline, AutoProcessor, AutoModelForVision2Seq, AutoTokenizer, AutoModelForCausalLM
6
 
7
- # --- SETUP TOKEN ---
8
- HF_TOKEN = os.getenv("HF_TOKEN") # Set in env or Secrets on Spaces
9
 
10
- # --- DEVICE ---
11
  DEVICE = 0 if torch.cuda.is_available() else -1
12
 
13
- # --- BLIP: Captioning ---
14
- processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large", token=HF_TOKEN)
15
- blip_model = AutoModelForVision2Seq.from_pretrained("Salesforce/blip-image-captioning-large", token=HF_TOKEN)
16
  caption_pipe = pipeline(
17
- task="image-to-text",
18
  model=blip_model,
19
  tokenizer=processor.tokenizer,
20
  image_processor=processor.image_processor,
21
  device=DEVICE,
22
- token=HF_TOKEN,
23
  )
24
 
25
- # --- GEMMA: Text Generation ---
26
- # Swap this to your preferred Gemma model ID, e.g. "google/gemma-2b-it"
27
- GEMMA_MODEL = "google/gemma-2b-it"
28
-
29
- gemma_tokenizer = AutoTokenizer.from_pretrained(GEMMA_MODEL, token=HF_TOKEN)
30
- gemma_model = AutoModelForCausalLM.from_pretrained(GEMMA_MODEL, token=HF_TOKEN)
31
  gemma_pipe = pipeline(
32
  "text-generation",
33
- model=gemma_model,
34
- tokenizer=gemma_tokenizer,
35
  device=DEVICE,
36
- max_new_tokens=384,
37
- do_sample=False,
38
- token=HF_TOKEN,
39
  )
40
 
41
  def get_recommendations():
@@ -52,81 +43,61 @@ def get_recommendations():
52
  "https://i.imgur.com/Xj92Cjv.jpeg",
53
  ]
54
 
 
 
 
 
 
 
 
 
55
  def process(image: Image):
56
  if image is None:
57
  return "", "", "", get_recommendations()
58
 
59
- # 1. BLIP: Caption
60
  caption_res = caption_pipe(image, max_new_tokens=64)
61
- description = caption_res[0]["generated_text"].strip()
62
 
63
- # 2. GEMMA: Category
64
- prompt_cat = f"This is an ad image. Description: {description}\n\nProvide a concise category label for this ad (e.g. Food, Fitness, Technology):"
65
- cat_out = gemma_pipe(prompt_cat)[0]['generated_text'].splitlines()[0].strip()
 
66
 
67
- # 3. GEMMA: Five-sentence analysis
68
- prompt_ana = (
69
- f"This is an ad image. Description: {description}\n\n"
70
- "Write exactly five sentences explaining what this ad communicates and its emotional impact."
71
  )
72
- ana_raw = gemma_pipe(prompt_ana)[0]['generated_text'].strip()
73
- # Get only first five sentences.
74
- import re
75
- sentences = re.split(r'(?<=[.!?])\s+', ana_raw)
76
- analysis = " ".join(sentences[:5])
77
 
78
- # 4. GEMMA: Five suggestions (bullets, unique)
79
- prompt_sug = (
80
- f"This is an ad image. Description: {description}\n\n"
81
- "Suggest five unique, practical improvements for this ad. Each must address a different aspect (message, visuals, call-to-action, targeting, layout, or design). "
82
- "Each suggestion must be one sentence and start with '- '. Do NOT repeat suggestions."
83
  )
84
- sug_raw = gemma_pipe(prompt_sug)[0]['generated_text']
85
- bullets = []
86
- seen = set()
87
- for line in sug_raw.splitlines():
88
- if line.startswith("-"):
89
- suggestion = line.strip()
90
- if suggestion and suggestion not in seen:
91
- bullets.append(suggestion)
92
- seen.add(suggestion)
93
- elif line.strip():
94
- suggestion = "- " + line.strip()
95
- if suggestion and suggestion not in seen:
96
- bullets.append(suggestion)
97
- seen.add(suggestion)
98
- if len(bullets) == 5:
99
- break
100
- # Defaults if not enough bullets
101
- defaults = [
102
- "- Make the main headline more eye-catching.",
103
- "- Add a clear and visible call-to-action button.",
104
- "- Use contrasting colors for better readability.",
105
- "- Highlight the unique selling point of the product.",
106
- "- Simplify the design to reduce clutter."
107
- ]
108
- for default in defaults:
109
- if len(bullets) < 5 and default not in seen:
110
- bullets.append(default)
111
- suggestions = "\n".join(bullets[:5])
112
 
113
- return cat_out, analysis, suggestions, get_recommendations()
114
 
115
  def main():
116
- with gr.Blocks(title="Smart Ad Analyzer (BLIP+Gemma)") as demo:
117
  gr.Markdown("## 📢 Smart Ad Analyzer (BLIP + Gemma)")
118
  gr.Markdown(
119
  """
120
- Upload your ad image below and instantly get expert feedback.
121
  Category, analysis, improvement suggestions—and example ads for inspiration.
122
  """
123
  )
124
  with gr.Row():
125
  inp = gr.Image(type='pil', label='Upload Ad Image')
126
  with gr.Column():
127
- cat_out = gr.Textbox(label='🗂️ Ad Category', interactive=False)
128
- ana_out = gr.Textbox(label='📊 Ad Analysis', lines=5, interactive=False)
129
- sug_out = gr.Textbox(label='🛠️ Improvement Suggestions', lines=5, interactive=False)
130
  btn = gr.Button('Analyze Ad', variant='primary')
131
  gallery = gr.Gallery(label='Example Ads')
132
  btn.click(
 
2
  import gradio as gr
3
  import torch
4
  from PIL import Image
5
+ from transformers import pipeline, AutoProcessor, AutoModelForVision2Seq
6
 
7
+ # Use HF_TOKEN from environment for private models if needed (can add below if your Gemma is gated)
8
+ HF_TOKEN = os.environ.get("HF_TOKEN")
9
 
10
+ # Auto-detect device
11
  DEVICE = 0 if torch.cuda.is_available() else -1
12
 
13
+ # Load BLIP for captioning
14
+ processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
15
+ blip_model = AutoModelForVision2Seq.from_pretrained("Salesforce/blip-image-captioning-large")
16
  caption_pipe = pipeline(
17
+ "image-to-text",
18
  model=blip_model,
19
  tokenizer=processor.tokenizer,
20
  image_processor=processor.image_processor,
21
  device=DEVICE,
 
22
  )
23
 
24
+ # Load Gemma for text generation (pick your Gemma checkpoint here)
 
 
 
 
 
25
  gemma_pipe = pipeline(
26
  "text-generation",
27
+ model="google/gemma-2b-it", # Change this to any working Gemma instruct model!
 
28
  device=DEVICE,
29
+ # token=HF_TOKEN # Uncomment if your Gemma model requires a token
 
 
30
  )
31
 
32
  def get_recommendations():
 
43
  "https://i.imgur.com/Xj92Cjv.jpeg",
44
  ]
45
 
46
+ def clean_output(text):
47
+ # Remove prompt echoes if any (Gemma sometimes echoes)
48
+ if "Description:" in text:
49
+ text = text.split("Description:", 1)[-1]
50
+ if "Category:" in text:
51
+ text = text.split("Category:", 1)[-1]
52
+ return text.strip()
53
+
54
  def process(image: Image):
55
  if image is None:
56
  return "", "", "", get_recommendations()
57
 
58
+ # 1. BLIP captioning
59
  caption_res = caption_pipe(image, max_new_tokens=64)
60
+ desc = caption_res[0]["generated_text"].strip()
61
 
62
+ # 2. Gemma: Category
63
+ cat_prompt = f"Classify the following ad in one or two words. Description: {desc}"
64
+ cat_out = gemma_pipe(cat_prompt, max_new_tokens=16)[0]['generated_text'].strip()
65
+ cat_out = clean_output(cat_out)
66
 
67
+ # 3. Gemma: Analysis (5 sentences)
68
+ ana_prompt = (
69
+ f"Describe in exactly five sentences what this ad communicates and its emotional impact. Description: {desc}"
 
70
  )
71
+ ana_out = gemma_pipe(ana_prompt, max_new_tokens=120)[0]['generated_text'].strip()
72
+ ana_out = clean_output(ana_out)
 
 
 
73
 
74
+ # 4. Gemma: Suggestions (5 bullets)
75
+ sug_prompt = (
76
+ f"Suggest five practical improvements for this ad. Each suggestion must be unique, address a different aspect (message, visuals, call to action, targeting, or layout), start with '- ', and be one sentence. Description: {desc}"
 
 
77
  )
78
+ sug_out = gemma_pipe(sug_prompt, max_new_tokens=120)[0]['generated_text'].strip()
79
+ sug_out = clean_output(sug_out)
80
+ # Keep only lines that start with '-'
81
+ sug_lines = [line for line in sug_out.splitlines() if line.strip().startswith('-')]
82
+ suggestions = "\n".join(sug_lines[:5]) if sug_lines else sug_out
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
+ return cat_out, ana_out, suggestions, get_recommendations()
85
 
86
  def main():
87
+ with gr.Blocks(title="Smart Ad Analyzer (BLIP + Gemma)") as demo:
88
  gr.Markdown("## 📢 Smart Ad Analyzer (BLIP + Gemma)")
89
  gr.Markdown(
90
  """
91
+ Upload your ad image below and instantly get expert feedback.
92
  Category, analysis, improvement suggestions—and example ads for inspiration.
93
  """
94
  )
95
  with gr.Row():
96
  inp = gr.Image(type='pil', label='Upload Ad Image')
97
  with gr.Column():
98
+ cat_out = gr.Textbox(label='Ad Category', interactive=False)
99
+ ana_out = gr.Textbox(label='Ad Analysis', lines=5, interactive=False)
100
+ sug_out = gr.Textbox(label='Improvement Suggestions', lines=5, interactive=False)
101
  btn = gr.Button('Analyze Ad', variant='primary')
102
  gallery = gr.Gallery(label='Example Ads')
103
  btn.click(