""" BleuPilot – Amazon Listing Optimizer (MVP) ------------------------------------------ Self-contained Gradio app for Hugging Face Spaces. - Generates localized Amazon titles, 5 bullets, and a description (FR/EN/DE/ES/IT) - Simple keyword enforcement and SEO checks - Uses Hugging Face serverless Inference API via `huggingface_hub.InferenceClient` """ from __future__ import annotations import os import re from dataclasses import dataclass from typing import List, Dict, Tuple import gradio as gr from huggingface_hub import InferenceClient # ------------------------- # Config # ------------------------- HF_TEXT_MODEL = os.getenv("HF_TEXT_MODEL", "HuggingFaceH4/zephyr-7b-beta") HF_API_TOKEN = os.getenv("HF_API_TOKEN", None) SUPPORTED_LANGS = { "French (FR)": "fr", "English (EN)": "en", "German (DE)": "de", "Spanish (ES)": "es", "Italian (IT)": "it", } MAX_TITLE_CHARS = 200 # Amazon soft cap (varies by category) BULLET_COUNT = 5 # ------------------------- # Helpers # ------------------------- @dataclass class ListingInput: title: str features: str # one per line or comma-separated description: str target_lang_code: str seed_keywords: List[str] def clean_keywords(raw: str) -> List[str]: if not raw.strip(): return [] items = re.split(r"[\n,;]", raw) items = [re.sub(r"\s+", " ", s).strip() for s in items] return [s for s in items if s] def ensure_keywords(text: str, keywords: List[str], lang_code: str) -> str: """Naive keyword enforcement: if a keyword is missing, append a short clause.""" if not keywords: return text missing = [kw for kw in keywords if re.search(rf"\b{re.escape(kw)}\b", text, flags=re.IGNORECASE) is None] if missing: extra = "; ".join(missing) suffix_map = { "fr": f" Mots-clés inclus : {extra}.", "en": f" Keywords included: {extra}.", "de": f" Enthaltene Schlüsselwörter: {extra}.", "es": f" Palabras clave incluidas: {extra}.", "it": f" Parole chiave incluse: {extra}.", } text += suffix_map.get(lang_code, f" Keywords: {extra}.") return text def seo_score(title: str, bullets: List[str], desc: str, keywords: List[str]) -> Dict[str, str]: score = {} title_len = len(title) score["title_length"] = f"{title_len} / {MAX_TITLE_CHARS} chars" score["title_ok"] = "✅" if title_len <= MAX_TITLE_CHARS else "❌ Too long" score["bullet_count"] = f"{len(bullets)} (target {BULLET_COUNT})" score["bullet_ok"] = "✅" if len(bullets) == BULLET_COUNT else "⚠️ Aim for 5 bullets" blob = "\n".join([title] + bullets + [desc]).lower() coverage = 0 missing = [] for kw in keywords: if kw.lower() in blob: coverage += 1 else: missing.append(kw) if keywords: pct = int(round(100 * coverage / max(1, len(keywords)))) score["keyword_coverage"] = f"{coverage}/{len(keywords)} ({pct}%)" else: score["keyword_coverage"] = "N/A" score["keywords_missing"] = ", ".join(missing) if missing else "None" return score def make_prompt(user: ListingInput) -> str: feats = [s.strip() for s in re.split(r"[\n•\-\u2022]", user.features) if s.strip()] system = ( "You are an expert Amazon SEO copywriter for EU marketplaces. " "Rewrite the listing to maximize CTR and conversion while keeping it compliant. " "Output in the target language only. Follow constraints strictly." ) constraints = ( f"- Title: ≤ {MAX_TITLE_CHARS} characters.\n" f"- Provide exactly {BULLET_COUNT} concise bullets.\n" "- Use clear, benefit-driven language, include seed keywords naturally.\n" "- Keep measurements in metric; avoid claims that require certifications.\n" "- Respect EU localization (decimal commas, local idioms).\n" ) seed_kw = ", ".join(user.seed_keywords) if user.seed_keywords else "(none)" content = f""" TARGET_LANGUAGE: {user.target_lang_code} SEED_KEYWORDS: {seed_kw} ORIGINAL_TITLE: {user.title} ORIGINAL_FEATURES: - """ + "\n- ".join(feats) + f""" ORIGINAL_DESCRIPTION: {user.description} Return JSON with fields: title, bullets (array of 5), description. """ prompt = f"<|system|>\n{system}\n\nConstraints:\n{constraints}\n<|user|>\n{content}\n<|assistant|>" return prompt def generate_listing(user: ListingInput) -> Tuple[str, List[str], str, Dict[str, str]]: client = InferenceClient(model=HF_TEXT_MODEL, token=HF_API_TOKEN) prompt = make_prompt(user) response = client.text_generation( prompt, max_new_tokens=700, temperature=0.6, top_p=0.9, repetition_penalty=1.1, return_full_text=False, stream=False, ) json_match = re.search(r"\{[\s\S]*\}", response) title, bullets, desc = "", [], "" if json_match: import json try: data = json.loads(json_match.group(0)) title = data.get("title", "").strip() bullets = [b.strip() for b in data.get("bullets", [])][:BULLET_COUNT] desc = data.get("description", "").strip() except Exception: pass if not title: lines = [l.strip() for l in response.splitlines() if l.strip()] title = next((l.split(":",1)[1].strip() for l in lines if l.lower().startswith("title") and ":" in l), lines[0] if lines else "") bullets = [l.lstrip("-• ").strip() for l in lines if l.startswith(("-","•"))][:BULLET_COUNT] if not bullets: bullets = [l for l in lines[1:1+BULLET_COUNT]] desc_idx = next((i for i,l in enumerate(lines) if l.lower().startswith("description")), None) if desc_idx is not None: desc = "\n".join(lines[desc_idx+1:]) else: desc = "\n".join(lines[BULLET_COUNT+1:]) title = ensure_keywords(title, user.seed_keywords, user.target_lang_code) desc = ensure_keywords(desc, user.seed_keywords, user.target_lang_code) bullets = (bullets + [""]*BULLET_COUNT)[:BULLET_COUNT] score = seo_score(title, bullets, desc, user.seed_keywords) return title, bullets, desc, score # ------------------------- # UI # ------------------------- with gr.Blocks(title="BleuPilot – Listing Optimizer", css=".mono{font-family:ui-monospace,Menlo,monospace}") as demo: gr.Markdown(""" # BleuPilot – Amazon Listing Optimizer (MVP) Paste your current listing, choose a language, add seed keywords, and generate. > Tip: Start with FR/DE/ES/IT to localize your EU listings quickly. """) with gr.Row(): with gr.Column(): inp_title = gr.Textbox(label="Original Title", placeholder="Enter current product title…", lines=2) inp_features= gr.Textbox(label="Features (one per line)", placeholder="Feature 1\nFeature 2\nFeature 3…", lines=8) inp_desc = gr.Textbox(label="Original Description", placeholder="Paste current description…", lines=8) lang = gr.Dropdown(list(SUPPORTED_LANGS.keys()), value="French (FR)", label="Target Language") kw = gr.Textbox(label="Seed Keywords (comma or line-separated)", placeholder="chien, sac à déjections, biodégradable…", lines=3) run_btn = gr.Button("Generate Optimized Listing 🚀", variant="primary") with gr.Column(): out_title = gr.Textbox(label="Optimized Title", lines=2) out_bullets = gr.Dataframe(headers=[f"Bullet {i+1}" for i in range(BULLET_COUNT)], row_count=1, col_count=BULLET_COUNT, wrap=True) out_desc = gr.Textbox(label="Optimized Description", lines=10) with gr.Accordion("SEO Checks", open=False): score_title = gr.Markdown("") def _on_click(title, feats, desc, lang_label, kw_raw): user = ListingInput( title=title or "", features=feats or "", description=desc or "", target_lang_code=SUPPORTED_LANGS.get(lang_label, "fr"), seed_keywords=clean_keywords(kw_raw or ""), ) new_title, bullets, new_desc, score = generate_listing(user) bullets_row = [bullets] score_md = ( f"**Title length:** {score['title_length']} — {score['title_ok']}\n\n" f"**Bullet count:** {score['bullet_count']} — {score['bullet_ok']}\n\n" f"**Keyword coverage:** {score['keyword_coverage']}\n\n" f"**Missing keywords:** {score['keywords_missing']}" ) return new_title, bullets_row, new_desc, score_md run_btn.click(_on_click, [inp_title, inp_features, inp_desc, lang, kw], [out_title, out_bullets, out_desc, score_title]) gr.Markdown(""" --- ### Notes - For best results, supply 5–8 seed keywords you want included. - Keep titles under ~200 chars. Some categories enforce smaller caps. - This MVP uses serverless text-generation. For production scale, switch to **Inference Endpoints** (dedicated CPU/GPU) and add auth + rate limits. """) if __name__ == "__main__": demo.launch()