File size: 9,161 Bytes
4b9be6d 4eecb9b 4b9be6d 4eecb9b 4b9be6d 4eecb9b 4b9be6d 4eecb9b 4b9be6d 4eecb9b 4b9be6d 4eecb9b 4b9be6d 4eecb9b 4b9be6d 4eecb9b 4b9be6d 4eecb9b 4b9be6d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 | """
BleuPilot – Amazon Listing Optimizer (MVP)
------------------------------------------
Self-contained Gradio app for Hugging Face Spaces.
- Generates localized Amazon titles, 5 bullets, and a description (FR/EN/DE/ES/IT)
- Simple keyword enforcement and SEO checks
- Uses Hugging Face serverless Inference API via `huggingface_hub.InferenceClient`
"""
from __future__ import annotations
import os
import re
from dataclasses import dataclass
from typing import List, Dict, Tuple
import gradio as gr
from huggingface_hub import InferenceClient
# -------------------------
# Config
# -------------------------
HF_TEXT_MODEL = os.getenv("HF_TEXT_MODEL", "HuggingFaceH4/zephyr-7b-beta")
HF_API_TOKEN = os.getenv("HF_API_TOKEN", None)
SUPPORTED_LANGS = {
"French (FR)": "fr",
"English (EN)": "en",
"German (DE)": "de",
"Spanish (ES)": "es",
"Italian (IT)": "it",
}
MAX_TITLE_CHARS = 200 # Amazon soft cap (varies by category)
BULLET_COUNT = 5
# -------------------------
# Helpers
# -------------------------
@dataclass
class ListingInput:
title: str
features: str # one per line or comma-separated
description: str
target_lang_code: str
seed_keywords: List[str]
def clean_keywords(raw: str) -> List[str]:
if not raw.strip():
return []
items = re.split(r"[\n,;]", raw)
items = [re.sub(r"\s+", " ", s).strip() for s in items]
return [s for s in items if s]
def ensure_keywords(text: str, keywords: List[str], lang_code: str) -> str:
"""Naive keyword enforcement: if a keyword is missing, append a short clause."""
if not keywords:
return text
missing = [kw for kw in keywords if re.search(rf"\b{re.escape(kw)}\b", text, flags=re.IGNORECASE) is None]
if missing:
extra = "; ".join(missing)
suffix_map = {
"fr": f" Mots-clés inclus : {extra}.",
"en": f" Keywords included: {extra}.",
"de": f" Enthaltene Schlüsselwörter: {extra}.",
"es": f" Palabras clave incluidas: {extra}.",
"it": f" Parole chiave incluse: {extra}.",
}
text += suffix_map.get(lang_code, f" Keywords: {extra}.")
return text
def seo_score(title: str, bullets: List[str], desc: str, keywords: List[str]) -> Dict[str, str]:
score = {}
title_len = len(title)
score["title_length"] = f"{title_len} / {MAX_TITLE_CHARS} chars"
score["title_ok"] = "✅" if title_len <= MAX_TITLE_CHARS else "❌ Too long"
score["bullet_count"] = f"{len(bullets)} (target {BULLET_COUNT})"
score["bullet_ok"] = "✅" if len(bullets) == BULLET_COUNT else "⚠️ Aim for 5 bullets"
blob = "\n".join([title] + bullets + [desc]).lower()
coverage = 0
missing = []
for kw in keywords:
if kw.lower() in blob:
coverage += 1
else:
missing.append(kw)
if keywords:
pct = int(round(100 * coverage / max(1, len(keywords))))
score["keyword_coverage"] = f"{coverage}/{len(keywords)} ({pct}%)"
else:
score["keyword_coverage"] = "N/A"
score["keywords_missing"] = ", ".join(missing) if missing else "None"
return score
def make_prompt(user: ListingInput) -> str:
feats = [s.strip() for s in re.split(r"[\n•\-\u2022]", user.features) if s.strip()]
system = (
"You are an expert Amazon SEO copywriter for EU marketplaces. "
"Rewrite the listing to maximize CTR and conversion while keeping it compliant. "
"Output in the target language only. Follow constraints strictly."
)
constraints = (
f"- Title: ≤ {MAX_TITLE_CHARS} characters.\n"
f"- Provide exactly {BULLET_COUNT} concise bullets.\n"
"- Use clear, benefit-driven language, include seed keywords naturally.\n"
"- Keep measurements in metric; avoid claims that require certifications.\n"
"- Respect EU localization (decimal commas, local idioms).\n"
)
seed_kw = ", ".join(user.seed_keywords) if user.seed_keywords else "(none)"
content = f"""
TARGET_LANGUAGE: {user.target_lang_code}
SEED_KEYWORDS: {seed_kw}
ORIGINAL_TITLE: {user.title}
ORIGINAL_FEATURES:
- """ + "\n- ".join(feats) + f"""
ORIGINAL_DESCRIPTION:
{user.description}
Return JSON with fields: title, bullets (array of 5), description.
"""
prompt = f"<|system|>\n{system}\n\nConstraints:\n{constraints}\n<|user|>\n{content}\n<|assistant|>"
return prompt
def generate_listing(user: ListingInput) -> Tuple[str, List[str], str, Dict[str, str]]:
client = InferenceClient(model=HF_TEXT_MODEL, token=HF_API_TOKEN)
prompt = make_prompt(user)
response = client.text_generation(
prompt,
max_new_tokens=700,
temperature=0.6,
top_p=0.9,
repetition_penalty=1.1,
return_full_text=False,
stream=False,
)
json_match = re.search(r"\{[\s\S]*\}", response)
title, bullets, desc = "", [], ""
if json_match:
import json
try:
data = json.loads(json_match.group(0))
title = data.get("title", "").strip()
bullets = [b.strip() for b in data.get("bullets", [])][:BULLET_COUNT]
desc = data.get("description", "").strip()
except Exception:
pass
if not title:
lines = [l.strip() for l in response.splitlines() if l.strip()]
title = next((l.split(":",1)[1].strip() for l in lines if l.lower().startswith("title") and ":" in l), lines[0] if lines else "")
bullets = [l.lstrip("-• ").strip() for l in lines if l.startswith(("-","•"))][:BULLET_COUNT]
if not bullets:
bullets = [l for l in lines[1:1+BULLET_COUNT]]
desc_idx = next((i for i,l in enumerate(lines) if l.lower().startswith("description")), None)
if desc_idx is not None:
desc = "\n".join(lines[desc_idx+1:])
else:
desc = "\n".join(lines[BULLET_COUNT+1:])
title = ensure_keywords(title, user.seed_keywords, user.target_lang_code)
desc = ensure_keywords(desc, user.seed_keywords, user.target_lang_code)
bullets = (bullets + [""]*BULLET_COUNT)[:BULLET_COUNT]
score = seo_score(title, bullets, desc, user.seed_keywords)
return title, bullets, desc, score
# -------------------------
# UI
# -------------------------
with gr.Blocks(title="BleuPilot – Listing Optimizer", css=".mono{font-family:ui-monospace,Menlo,monospace}") as demo:
gr.Markdown("""
# BleuPilot – Amazon Listing Optimizer (MVP)
Paste your current listing, choose a language, add seed keywords, and generate.
> Tip: Start with FR/DE/ES/IT to localize your EU listings quickly.
""")
with gr.Row():
with gr.Column():
inp_title = gr.Textbox(label="Original Title", placeholder="Enter current product title…", lines=2)
inp_features= gr.Textbox(label="Features (one per line)", placeholder="Feature 1\nFeature 2\nFeature 3…", lines=8)
inp_desc = gr.Textbox(label="Original Description", placeholder="Paste current description…", lines=8)
lang = gr.Dropdown(list(SUPPORTED_LANGS.keys()), value="French (FR)", label="Target Language")
kw = gr.Textbox(label="Seed Keywords (comma or line-separated)", placeholder="chien, sac à déjections, biodégradable…", lines=3)
run_btn = gr.Button("Generate Optimized Listing 🚀", variant="primary")
with gr.Column():
out_title = gr.Textbox(label="Optimized Title", lines=2)
out_bullets = gr.Dataframe(headers=[f"Bullet {i+1}" for i in range(BULLET_COUNT)], row_count=1, col_count=BULLET_COUNT, wrap=True)
out_desc = gr.Textbox(label="Optimized Description", lines=10)
with gr.Accordion("SEO Checks", open=False):
score_title = gr.Markdown("")
def _on_click(title, feats, desc, lang_label, kw_raw):
user = ListingInput(
title=title or "",
features=feats or "",
description=desc or "",
target_lang_code=SUPPORTED_LANGS.get(lang_label, "fr"),
seed_keywords=clean_keywords(kw_raw or ""),
)
new_title, bullets, new_desc, score = generate_listing(user)
bullets_row = [bullets]
score_md = (
f"**Title length:** {score['title_length']} — {score['title_ok']}\n\n"
f"**Bullet count:** {score['bullet_count']} — {score['bullet_ok']}\n\n"
f"**Keyword coverage:** {score['keyword_coverage']}\n\n"
f"**Missing keywords:** {score['keywords_missing']}"
)
return new_title, bullets_row, new_desc, score_md
run_btn.click(_on_click, [inp_title, inp_features, inp_desc, lang, kw], [out_title, out_bullets, out_desc, score_title])
gr.Markdown("""
---
### Notes
- For best results, supply 5–8 seed keywords you want included.
- Keep titles under ~200 chars. Some categories enforce smaller caps.
- This MVP uses serverless text-generation. For production scale, switch to **Inference Endpoints** (dedicated CPU/GPU) and add auth + rate limits.
""")
if __name__ == "__main__":
demo.launch()
|