Jesiel Rombley
Update app.py
4eecb9b verified
"""
BleuPilot – Amazon Listing Optimizer (MVP)
------------------------------------------
Self-contained Gradio app for Hugging Face Spaces.
- Generates localized Amazon titles, 5 bullets, and a description (FR/EN/DE/ES/IT)
- Simple keyword enforcement and SEO checks
- Uses Hugging Face serverless Inference API via `huggingface_hub.InferenceClient`
"""
from __future__ import annotations
import os
import re
from dataclasses import dataclass
from typing import List, Dict, Tuple
import gradio as gr
from huggingface_hub import InferenceClient
# -------------------------
# Config
# -------------------------
HF_TEXT_MODEL = os.getenv("HF_TEXT_MODEL", "HuggingFaceH4/zephyr-7b-beta")
HF_API_TOKEN = os.getenv("HF_API_TOKEN", None)
SUPPORTED_LANGS = {
"French (FR)": "fr",
"English (EN)": "en",
"German (DE)": "de",
"Spanish (ES)": "es",
"Italian (IT)": "it",
}
MAX_TITLE_CHARS = 200 # Amazon soft cap (varies by category)
BULLET_COUNT = 5
# -------------------------
# Helpers
# -------------------------
@dataclass
class ListingInput:
title: str
features: str # one per line or comma-separated
description: str
target_lang_code: str
seed_keywords: List[str]
def clean_keywords(raw: str) -> List[str]:
if not raw.strip():
return []
items = re.split(r"[\n,;]", raw)
items = [re.sub(r"\s+", " ", s).strip() for s in items]
return [s for s in items if s]
def ensure_keywords(text: str, keywords: List[str], lang_code: str) -> str:
"""Naive keyword enforcement: if a keyword is missing, append a short clause."""
if not keywords:
return text
missing = [kw for kw in keywords if re.search(rf"\b{re.escape(kw)}\b", text, flags=re.IGNORECASE) is None]
if missing:
extra = "; ".join(missing)
suffix_map = {
"fr": f" Mots-clés inclus : {extra}.",
"en": f" Keywords included: {extra}.",
"de": f" Enthaltene Schlüsselwörter: {extra}.",
"es": f" Palabras clave incluidas: {extra}.",
"it": f" Parole chiave incluse: {extra}.",
}
text += suffix_map.get(lang_code, f" Keywords: {extra}.")
return text
def seo_score(title: str, bullets: List[str], desc: str, keywords: List[str]) -> Dict[str, str]:
score = {}
title_len = len(title)
score["title_length"] = f"{title_len} / {MAX_TITLE_CHARS} chars"
score["title_ok"] = "✅" if title_len <= MAX_TITLE_CHARS else "❌ Too long"
score["bullet_count"] = f"{len(bullets)} (target {BULLET_COUNT})"
score["bullet_ok"] = "✅" if len(bullets) == BULLET_COUNT else "⚠️ Aim for 5 bullets"
blob = "\n".join([title] + bullets + [desc]).lower()
coverage = 0
missing = []
for kw in keywords:
if kw.lower() in blob:
coverage += 1
else:
missing.append(kw)
if keywords:
pct = int(round(100 * coverage / max(1, len(keywords))))
score["keyword_coverage"] = f"{coverage}/{len(keywords)} ({pct}%)"
else:
score["keyword_coverage"] = "N/A"
score["keywords_missing"] = ", ".join(missing) if missing else "None"
return score
def make_prompt(user: ListingInput) -> str:
feats = [s.strip() for s in re.split(r"[\n•\-\u2022]", user.features) if s.strip()]
system = (
"You are an expert Amazon SEO copywriter for EU marketplaces. "
"Rewrite the listing to maximize CTR and conversion while keeping it compliant. "
"Output in the target language only. Follow constraints strictly."
)
constraints = (
f"- Title: ≤ {MAX_TITLE_CHARS} characters.\n"
f"- Provide exactly {BULLET_COUNT} concise bullets.\n"
"- Use clear, benefit-driven language, include seed keywords naturally.\n"
"- Keep measurements in metric; avoid claims that require certifications.\n"
"- Respect EU localization (decimal commas, local idioms).\n"
)
seed_kw = ", ".join(user.seed_keywords) if user.seed_keywords else "(none)"
content = f"""
TARGET_LANGUAGE: {user.target_lang_code}
SEED_KEYWORDS: {seed_kw}
ORIGINAL_TITLE: {user.title}
ORIGINAL_FEATURES:
- """ + "\n- ".join(feats) + f"""
ORIGINAL_DESCRIPTION:
{user.description}
Return JSON with fields: title, bullets (array of 5), description.
"""
prompt = f"<|system|>\n{system}\n\nConstraints:\n{constraints}\n<|user|>\n{content}\n<|assistant|>"
return prompt
def generate_listing(user: ListingInput) -> Tuple[str, List[str], str, Dict[str, str]]:
client = InferenceClient(model=HF_TEXT_MODEL, token=HF_API_TOKEN)
prompt = make_prompt(user)
response = client.text_generation(
prompt,
max_new_tokens=700,
temperature=0.6,
top_p=0.9,
repetition_penalty=1.1,
return_full_text=False,
stream=False,
)
json_match = re.search(r"\{[\s\S]*\}", response)
title, bullets, desc = "", [], ""
if json_match:
import json
try:
data = json.loads(json_match.group(0))
title = data.get("title", "").strip()
bullets = [b.strip() for b in data.get("bullets", [])][:BULLET_COUNT]
desc = data.get("description", "").strip()
except Exception:
pass
if not title:
lines = [l.strip() for l in response.splitlines() if l.strip()]
title = next((l.split(":",1)[1].strip() for l in lines if l.lower().startswith("title") and ":" in l), lines[0] if lines else "")
bullets = [l.lstrip("-• ").strip() for l in lines if l.startswith(("-","•"))][:BULLET_COUNT]
if not bullets:
bullets = [l for l in lines[1:1+BULLET_COUNT]]
desc_idx = next((i for i,l in enumerate(lines) if l.lower().startswith("description")), None)
if desc_idx is not None:
desc = "\n".join(lines[desc_idx+1:])
else:
desc = "\n".join(lines[BULLET_COUNT+1:])
title = ensure_keywords(title, user.seed_keywords, user.target_lang_code)
desc = ensure_keywords(desc, user.seed_keywords, user.target_lang_code)
bullets = (bullets + [""]*BULLET_COUNT)[:BULLET_COUNT]
score = seo_score(title, bullets, desc, user.seed_keywords)
return title, bullets, desc, score
# -------------------------
# UI
# -------------------------
with gr.Blocks(title="BleuPilot – Listing Optimizer", css=".mono{font-family:ui-monospace,Menlo,monospace}") as demo:
gr.Markdown("""
# BleuPilot – Amazon Listing Optimizer (MVP)
Paste your current listing, choose a language, add seed keywords, and generate.
> Tip: Start with FR/DE/ES/IT to localize your EU listings quickly.
""")
with gr.Row():
with gr.Column():
inp_title = gr.Textbox(label="Original Title", placeholder="Enter current product title…", lines=2)
inp_features= gr.Textbox(label="Features (one per line)", placeholder="Feature 1\nFeature 2\nFeature 3…", lines=8)
inp_desc = gr.Textbox(label="Original Description", placeholder="Paste current description…", lines=8)
lang = gr.Dropdown(list(SUPPORTED_LANGS.keys()), value="French (FR)", label="Target Language")
kw = gr.Textbox(label="Seed Keywords (comma or line-separated)", placeholder="chien, sac à déjections, biodégradable…", lines=3)
run_btn = gr.Button("Generate Optimized Listing 🚀", variant="primary")
with gr.Column():
out_title = gr.Textbox(label="Optimized Title", lines=2)
out_bullets = gr.Dataframe(headers=[f"Bullet {i+1}" for i in range(BULLET_COUNT)], row_count=1, col_count=BULLET_COUNT, wrap=True)
out_desc = gr.Textbox(label="Optimized Description", lines=10)
with gr.Accordion("SEO Checks", open=False):
score_title = gr.Markdown("")
def _on_click(title, feats, desc, lang_label, kw_raw):
user = ListingInput(
title=title or "",
features=feats or "",
description=desc or "",
target_lang_code=SUPPORTED_LANGS.get(lang_label, "fr"),
seed_keywords=clean_keywords(kw_raw or ""),
)
new_title, bullets, new_desc, score = generate_listing(user)
bullets_row = [bullets]
score_md = (
f"**Title length:** {score['title_length']}{score['title_ok']}\n\n"
f"**Bullet count:** {score['bullet_count']}{score['bullet_ok']}\n\n"
f"**Keyword coverage:** {score['keyword_coverage']}\n\n"
f"**Missing keywords:** {score['keywords_missing']}"
)
return new_title, bullets_row, new_desc, score_md
run_btn.click(_on_click, [inp_title, inp_features, inp_desc, lang, kw], [out_title, out_bullets, out_desc, score_title])
gr.Markdown("""
---
### Notes
- For best results, supply 5–8 seed keywords you want included.
- Keep titles under ~200 chars. Some categories enforce smaller caps.
- This MVP uses serverless text-generation. For production scale, switch to **Inference Endpoints** (dedicated CPU/GPU) and add auth + rate limits.
""")
if __name__ == "__main__":
demo.launch()