Jesiel Rombley commited on
Commit
4eecb9b
·
verified ·
1 Parent(s): 4b9be6d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -61
app.py CHANGED
@@ -1,20 +1,10 @@
1
  """
2
  BleuPilot – Amazon Listing Optimizer (MVP)
3
- -------------------------------------------------
4
  Self-contained Gradio app for Hugging Face Spaces.
5
  - Generates localized Amazon titles, 5 bullets, and a description (FR/EN/DE/ES/IT)
6
  - Simple keyword enforcement and SEO checks
7
  - Uses Hugging Face serverless Inference API via `huggingface_hub.InferenceClient`
8
-
9
- How to deploy on a Space (summary):
10
- 1) Create a new Space (SDK: Gradio, Private or Public).
11
- 2) Add two files: `app.py` (this file) and `requirements.txt` (see bottom comment).
12
- 3) In Space Settings → Secrets, add: `HF_API_TOKEN` (with Inference API access).
13
- 4) Commit & run. Optional: set `HF_TEXT_MODEL` space variable to switch models.
14
-
15
- Note: For best latency, start with a light instruct model available on serverless.
16
- Recommended default: "HuggingFaceH4/zephyr-7b-beta" (changeable via env var).
17
- You can later migrate hot paths to Inference Endpoints for predictable scale.
18
  """
19
 
20
  from __future__ import annotations
@@ -30,7 +20,7 @@ from huggingface_hub import InferenceClient
30
  # Config
31
  # -------------------------
32
  HF_TEXT_MODEL = os.getenv("HF_TEXT_MODEL", "HuggingFaceH4/zephyr-7b-beta")
33
- HF_API_TOKEN = os.getenv("HF_API_TOKEN", None)
34
 
35
  SUPPORTED_LANGS = {
36
  "French (FR)": "fr",
@@ -54,7 +44,6 @@ class ListingInput:
54
  target_lang_code: str
55
  seed_keywords: List[str]
56
 
57
-
58
  def clean_keywords(raw: str) -> List[str]:
59
  if not raw.strip():
60
  return []
@@ -62,7 +51,6 @@ def clean_keywords(raw: str) -> List[str]:
62
  items = [re.sub(r"\s+", " ", s).strip() for s in items]
63
  return [s for s in items if s]
64
 
65
-
66
  def ensure_keywords(text: str, keywords: List[str], lang_code: str) -> str:
67
  """Naive keyword enforcement: if a keyword is missing, append a short clause."""
68
  if not keywords:
@@ -70,7 +58,6 @@ def ensure_keywords(text: str, keywords: List[str], lang_code: str) -> str:
70
  missing = [kw for kw in keywords if re.search(rf"\b{re.escape(kw)}\b", text, flags=re.IGNORECASE) is None]
71
  if missing:
72
  extra = "; ".join(missing)
73
- # Append in a natural way per language
74
  suffix_map = {
75
  "fr": f" Mots-clés inclus : {extra}.",
76
  "en": f" Keywords included: {extra}.",
@@ -81,7 +68,6 @@ def ensure_keywords(text: str, keywords: List[str], lang_code: str) -> str:
81
  text += suffix_map.get(lang_code, f" Keywords: {extra}.")
82
  return text
83
 
84
-
85
  def seo_score(title: str, bullets: List[str], desc: str, keywords: List[str]) -> Dict[str, str]:
86
  score = {}
87
  title_len = len(title)
@@ -91,7 +77,6 @@ def seo_score(title: str, bullets: List[str], desc: str, keywords: List[str]) ->
91
  score["bullet_count"] = f"{len(bullets)} (target {BULLET_COUNT})"
92
  score["bullet_ok"] = "✅" if len(bullets) == BULLET_COUNT else "⚠️ Aim for 5 bullets"
93
 
94
- # Keyword coverage (simple substring check across all blocks)
95
  blob = "\n".join([title] + bullets + [desc]).lower()
96
  coverage = 0
97
  missing = []
@@ -106,15 +91,11 @@ def seo_score(title: str, bullets: List[str], desc: str, keywords: List[str]) ->
106
  else:
107
  score["keyword_coverage"] = "N/A"
108
  score["keywords_missing"] = ", ".join(missing) if missing else "None"
109
-
110
  return score
111
 
112
-
113
  def make_prompt(user: ListingInput) -> str:
114
- # Normalize features into list
115
  feats = [s.strip() for s in re.split(r"[\n•\-\u2022]", user.features) if s.strip()]
116
 
117
- # System-style instructions for instruct models
118
  system = (
119
  "You are an expert Amazon SEO copywriter for EU marketplaces. "
120
  "Rewrite the listing to maximize CTR and conversion while keeping it compliant. "
@@ -136,7 +117,8 @@ TARGET_LANGUAGE: {user.target_lang_code}
136
  SEED_KEYWORDS: {seed_kw}
137
 
138
  ORIGINAL_TITLE: {user.title}
139
- ORIGINAL_FEATURES:\n- """ + "\n- ".join(feats) + f"""
 
140
 
141
  ORIGINAL_DESCRIPTION:
142
  {user.description}
@@ -147,13 +129,10 @@ Return JSON with fields: title, bullets (array of 5), description.
147
  prompt = f"<|system|>\n{system}\n\nConstraints:\n{constraints}\n<|user|>\n{content}\n<|assistant|>"
148
  return prompt
149
 
150
-
151
  def generate_listing(user: ListingInput) -> Tuple[str, List[str], str, Dict[str, str]]:
152
  client = InferenceClient(model=HF_TEXT_MODEL, token=HF_API_TOKEN)
153
-
154
  prompt = make_prompt(user)
155
 
156
- # Text-generation params tuned for instruction models
157
  response = client.text_generation(
158
  prompt,
159
  max_new_tokens=700,
@@ -164,7 +143,6 @@ def generate_listing(user: ListingInput) -> Tuple[str, List[str], str, Dict[str,
164
  stream=False,
165
  )
166
 
167
- # Heuristic: extract JSON block
168
  json_match = re.search(r"\{[\s\S]*\}", response)
169
  title, bullets, desc = "", [], ""
170
 
@@ -178,15 +156,10 @@ def generate_listing(user: ListingInput) -> Tuple[str, List[str], str, Dict[str,
178
  except Exception:
179
  pass
180
 
181
- # Fallback: try to split text if JSON parsing failed
182
  if not title:
183
- # naive parsing
184
  lines = [l.strip() for l in response.splitlines() if l.strip()]
185
- # find title
186
  title = next((l.split(":",1)[1].strip() for l in lines if l.lower().startswith("title") and ":" in l), lines[0] if lines else "")
187
- # bullets
188
  bullets = [l.lstrip("-• ").strip() for l in lines if l.startswith(("-","•"))][:BULLET_COUNT]
189
- # description
190
  if not bullets:
191
  bullets = [l for l in lines[1:1+BULLET_COUNT]]
192
  desc_idx = next((i for i,l in enumerate(lines) if l.lower().startswith("description")), None)
@@ -195,19 +168,13 @@ def generate_listing(user: ListingInput) -> Tuple[str, List[str], str, Dict[str,
195
  else:
196
  desc = "\n".join(lines[BULLET_COUNT+1:])
197
 
198
- # Keyword enforcement
199
  title = ensure_keywords(title, user.seed_keywords, user.target_lang_code)
200
- desc = ensure_keywords(desc, user.seed_keywords, user.target_lang_code)
201
 
202
- # Pad/trim bullets to exactly 5
203
  bullets = (bullets + [""]*BULLET_COUNT)[:BULLET_COUNT]
204
-
205
- # SEO score
206
  score = seo_score(title, bullets, desc, user.seed_keywords)
207
-
208
  return title, bullets, desc, score
209
 
210
-
211
  # -------------------------
212
  # UI
213
  # -------------------------
@@ -220,17 +187,17 @@ Paste your current listing, choose a language, add seed keywords, and generate.
220
 
221
  with gr.Row():
222
  with gr.Column():
223
- inp_title = gr.Textbox(label="Original Title", placeholder="Enter current product title…", lines=2)
224
- inp_features = gr.Textbox(label="Features (one per line)", placeholder="Feature 1\nFeature 2\nFeature 3…", lines=8)
225
- inp_desc = gr.Textbox(label="Original Description", placeholder="Paste current description…", lines=8)
226
- lang = gr.Dropdown(list(SUPPORTED_LANGS.keys()), value="French (FR)", label="Target Language")
227
- kw = gr.Textbox(label="Seed Keywords (comma or line-separated)", placeholder="chien, sac à déjections, biodégradable…", lines=3)
228
- run_btn = gr.Button("Generate Optimized Listing 🚀", variant="primary")
229
 
230
  with gr.Column():
231
- out_title = gr.Textbox(label="Optimized Title", lines=2)
232
  out_bullets = gr.Dataframe(headers=[f"Bullet {i+1}" for i in range(BULLET_COUNT)], row_count=1, col_count=BULLET_COUNT, wrap=True)
233
- out_desc = gr.Textbox(label="Optimized Description", lines=10)
234
 
235
  with gr.Accordion("SEO Checks", open=False):
236
  score_title = gr.Markdown("")
@@ -244,9 +211,7 @@ Paste your current listing, choose a language, add seed keywords, and generate.
244
  seed_keywords=clean_keywords(kw_raw or ""),
245
  )
246
  new_title, bullets, new_desc, score = generate_listing(user)
247
- # Convert bullets to a single-row dataframe structure
248
  bullets_row = [bullets]
249
- # Render score as markdown
250
  score_md = (
251
  f"**Title length:** {score['title_length']} — {score['title_ok']}\n\n"
252
  f"**Bullet count:** {score['bullet_count']} — {score['bullet_ok']}\n\n"
@@ -257,24 +222,13 @@ Paste your current listing, choose a language, add seed keywords, and generate.
257
 
258
  run_btn.click(_on_click, [inp_title, inp_features, inp_desc, lang, kw], [out_title, out_bullets, out_desc, score_title])
259
 
260
- gr.Markdown(
261
- """
262
  ---
263
  ### Notes
264
  - For best results, supply 5–8 seed keywords you want included.
265
  - Keep titles under ~200 chars. Some categories enforce smaller caps.
266
  - This MVP uses serverless text-generation. For production scale, switch to **Inference Endpoints** (dedicated CPU/GPU) and add auth + rate limits.
267
- """
268
- )
269
 
270
  if __name__ == "__main__":
271
  demo.launch()
272
-
273
- # -------------------------
274
- # requirements.txt (create a separate file in your Space)
275
- # -------------------------
276
- # gradio>=4.31.0
277
- # huggingface_hub>=0.23.0
278
- #
279
- # Optionally pin a specific version of transformers if you later switch to local models
280
- # transformers>=4.41.0
 
1
  """
2
  BleuPilot – Amazon Listing Optimizer (MVP)
3
+ ------------------------------------------
4
  Self-contained Gradio app for Hugging Face Spaces.
5
  - Generates localized Amazon titles, 5 bullets, and a description (FR/EN/DE/ES/IT)
6
  - Simple keyword enforcement and SEO checks
7
  - Uses Hugging Face serverless Inference API via `huggingface_hub.InferenceClient`
 
 
 
 
 
 
 
 
 
 
8
  """
9
 
10
  from __future__ import annotations
 
20
  # Config
21
  # -------------------------
22
  HF_TEXT_MODEL = os.getenv("HF_TEXT_MODEL", "HuggingFaceH4/zephyr-7b-beta")
23
+ HF_API_TOKEN = os.getenv("HF_API_TOKEN", None)
24
 
25
  SUPPORTED_LANGS = {
26
  "French (FR)": "fr",
 
44
  target_lang_code: str
45
  seed_keywords: List[str]
46
 
 
47
  def clean_keywords(raw: str) -> List[str]:
48
  if not raw.strip():
49
  return []
 
51
  items = [re.sub(r"\s+", " ", s).strip() for s in items]
52
  return [s for s in items if s]
53
 
 
54
  def ensure_keywords(text: str, keywords: List[str], lang_code: str) -> str:
55
  """Naive keyword enforcement: if a keyword is missing, append a short clause."""
56
  if not keywords:
 
58
  missing = [kw for kw in keywords if re.search(rf"\b{re.escape(kw)}\b", text, flags=re.IGNORECASE) is None]
59
  if missing:
60
  extra = "; ".join(missing)
 
61
  suffix_map = {
62
  "fr": f" Mots-clés inclus : {extra}.",
63
  "en": f" Keywords included: {extra}.",
 
68
  text += suffix_map.get(lang_code, f" Keywords: {extra}.")
69
  return text
70
 
 
71
  def seo_score(title: str, bullets: List[str], desc: str, keywords: List[str]) -> Dict[str, str]:
72
  score = {}
73
  title_len = len(title)
 
77
  score["bullet_count"] = f"{len(bullets)} (target {BULLET_COUNT})"
78
  score["bullet_ok"] = "✅" if len(bullets) == BULLET_COUNT else "⚠️ Aim for 5 bullets"
79
 
 
80
  blob = "\n".join([title] + bullets + [desc]).lower()
81
  coverage = 0
82
  missing = []
 
91
  else:
92
  score["keyword_coverage"] = "N/A"
93
  score["keywords_missing"] = ", ".join(missing) if missing else "None"
 
94
  return score
95
 
 
96
  def make_prompt(user: ListingInput) -> str:
 
97
  feats = [s.strip() for s in re.split(r"[\n•\-\u2022]", user.features) if s.strip()]
98
 
 
99
  system = (
100
  "You are an expert Amazon SEO copywriter for EU marketplaces. "
101
  "Rewrite the listing to maximize CTR and conversion while keeping it compliant. "
 
117
  SEED_KEYWORDS: {seed_kw}
118
 
119
  ORIGINAL_TITLE: {user.title}
120
+ ORIGINAL_FEATURES:
121
+ - """ + "\n- ".join(feats) + f"""
122
 
123
  ORIGINAL_DESCRIPTION:
124
  {user.description}
 
129
  prompt = f"<|system|>\n{system}\n\nConstraints:\n{constraints}\n<|user|>\n{content}\n<|assistant|>"
130
  return prompt
131
 
 
132
  def generate_listing(user: ListingInput) -> Tuple[str, List[str], str, Dict[str, str]]:
133
  client = InferenceClient(model=HF_TEXT_MODEL, token=HF_API_TOKEN)
 
134
  prompt = make_prompt(user)
135
 
 
136
  response = client.text_generation(
137
  prompt,
138
  max_new_tokens=700,
 
143
  stream=False,
144
  )
145
 
 
146
  json_match = re.search(r"\{[\s\S]*\}", response)
147
  title, bullets, desc = "", [], ""
148
 
 
156
  except Exception:
157
  pass
158
 
 
159
  if not title:
 
160
  lines = [l.strip() for l in response.splitlines() if l.strip()]
 
161
  title = next((l.split(":",1)[1].strip() for l in lines if l.lower().startswith("title") and ":" in l), lines[0] if lines else "")
 
162
  bullets = [l.lstrip("-• ").strip() for l in lines if l.startswith(("-","•"))][:BULLET_COUNT]
 
163
  if not bullets:
164
  bullets = [l for l in lines[1:1+BULLET_COUNT]]
165
  desc_idx = next((i for i,l in enumerate(lines) if l.lower().startswith("description")), None)
 
168
  else:
169
  desc = "\n".join(lines[BULLET_COUNT+1:])
170
 
 
171
  title = ensure_keywords(title, user.seed_keywords, user.target_lang_code)
172
+ desc = ensure_keywords(desc, user.seed_keywords, user.target_lang_code)
173
 
 
174
  bullets = (bullets + [""]*BULLET_COUNT)[:BULLET_COUNT]
 
 
175
  score = seo_score(title, bullets, desc, user.seed_keywords)
 
176
  return title, bullets, desc, score
177
 
 
178
  # -------------------------
179
  # UI
180
  # -------------------------
 
187
 
188
  with gr.Row():
189
  with gr.Column():
190
+ inp_title = gr.Textbox(label="Original Title", placeholder="Enter current product title…", lines=2)
191
+ inp_features= gr.Textbox(label="Features (one per line)", placeholder="Feature 1\nFeature 2\nFeature 3…", lines=8)
192
+ inp_desc = gr.Textbox(label="Original Description", placeholder="Paste current description…", lines=8)
193
+ lang = gr.Dropdown(list(SUPPORTED_LANGS.keys()), value="French (FR)", label="Target Language")
194
+ kw = gr.Textbox(label="Seed Keywords (comma or line-separated)", placeholder="chien, sac à déjections, biodégradable…", lines=3)
195
+ run_btn = gr.Button("Generate Optimized Listing 🚀", variant="primary")
196
 
197
  with gr.Column():
198
+ out_title = gr.Textbox(label="Optimized Title", lines=2)
199
  out_bullets = gr.Dataframe(headers=[f"Bullet {i+1}" for i in range(BULLET_COUNT)], row_count=1, col_count=BULLET_COUNT, wrap=True)
200
+ out_desc = gr.Textbox(label="Optimized Description", lines=10)
201
 
202
  with gr.Accordion("SEO Checks", open=False):
203
  score_title = gr.Markdown("")
 
211
  seed_keywords=clean_keywords(kw_raw or ""),
212
  )
213
  new_title, bullets, new_desc, score = generate_listing(user)
 
214
  bullets_row = [bullets]
 
215
  score_md = (
216
  f"**Title length:** {score['title_length']} — {score['title_ok']}\n\n"
217
  f"**Bullet count:** {score['bullet_count']} — {score['bullet_ok']}\n\n"
 
222
 
223
  run_btn.click(_on_click, [inp_title, inp_features, inp_desc, lang, kw], [out_title, out_bullets, out_desc, score_title])
224
 
225
+ gr.Markdown("""
 
226
  ---
227
  ### Notes
228
  - For best results, supply 5–8 seed keywords you want included.
229
  - Keep titles under ~200 chars. Some categories enforce smaller caps.
230
  - This MVP uses serverless text-generation. For production scale, switch to **Inference Endpoints** (dedicated CPU/GPU) and add auth + rate limits.
231
+ """)
 
232
 
233
  if __name__ == "__main__":
234
  demo.launch()