Spaces:

Jesiel-AI
/

bleupilot-listing-optimizer

Sleeping

File size: 9,161 Bytes

"""
BleuPilot – Amazon Listing Optimizer (MVP)
------------------------------------------
Self-contained Gradio app for Hugging Face Spaces.
- Generates localized Amazon titles, 5 bullets, and a description (FR/EN/DE/ES/IT)
- Simple keyword enforcement and SEO checks
- Uses Hugging Face serverless Inference API via `huggingface_hub.InferenceClient`
"""

from __future__ import annotations
import os
import re
from dataclasses import dataclass
from typing import List, Dict, Tuple

import gradio as gr
from huggingface_hub import InferenceClient

# -------------------------
# Config
# -------------------------
HF_TEXT_MODEL = os.getenv("HF_TEXT_MODEL", "HuggingFaceH4/zephyr-7b-beta")
HF_API_TOKEN  = os.getenv("HF_API_TOKEN", None)

SUPPORTED_LANGS = {
    "French (FR)": "fr",
    "English (EN)": "en",
    "German (DE)": "de",
    "Spanish (ES)": "es",
    "Italian (IT)": "it",
}

MAX_TITLE_CHARS = 200  # Amazon soft cap (varies by category)
BULLET_COUNT = 5

# -------------------------
# Helpers
# -------------------------
@dataclass
class ListingInput:
    title: str
    features: str  # one per line or comma-separated
    description: str
    target_lang_code: str
    seed_keywords: List[str]

def clean_keywords(raw: str) -> List[str]:
    if not raw.strip():
        return []
    items = re.split(r"[\n,;]", raw)
    items = [re.sub(r"\s+", " ", s).strip() for s in items]
    return [s for s in items if s]

def ensure_keywords(text: str, keywords: List[str], lang_code: str) -> str:
    """Naive keyword enforcement: if a keyword is missing, append a short clause."""
    if not keywords:
        return text
    missing = [kw for kw in keywords if re.search(rf"\b{re.escape(kw)}\b", text, flags=re.IGNORECASE) is None]
    if missing:
        extra = "; ".join(missing)
        suffix_map = {
            "fr": f" Mots-clés inclus : {extra}.",
            "en": f" Keywords included: {extra}.",
            "de": f" Enthaltene Schlüsselwörter: {extra}.",
            "es": f" Palabras clave incluidas: {extra}.",
            "it": f" Parole chiave incluse: {extra}.",
        }
        text += suffix_map.get(lang_code, f" Keywords: {extra}.")
    return text

def seo_score(title: str, bullets: List[str], desc: str, keywords: List[str]) -> Dict[str, str]:
    score = {}
    title_len = len(title)
    score["title_length"] = f"{title_len} / {MAX_TITLE_CHARS} chars"
    score["title_ok"] = "✅" if title_len <= MAX_TITLE_CHARS else "❌ Too long"

    score["bullet_count"] = f"{len(bullets)} (target {BULLET_COUNT})"
    score["bullet_ok"] = "✅" if len(bullets) == BULLET_COUNT else "⚠️ Aim for 5 bullets"

    blob = "\n".join([title] + bullets + [desc]).lower()
    coverage = 0
    missing = []
    for kw in keywords:
        if kw.lower() in blob:
            coverage += 1
        else:
            missing.append(kw)
    if keywords:
        pct = int(round(100 * coverage / max(1, len(keywords))))
        score["keyword_coverage"] = f"{coverage}/{len(keywords)} ({pct}%)"
    else:
        score["keyword_coverage"] = "N/A"
    score["keywords_missing"] = ", ".join(missing) if missing else "None"
    return score

def make_prompt(user: ListingInput) -> str:
    feats = [s.strip() for s in re.split(r"[\n•\-\u2022]", user.features) if s.strip()]

    system = (
        "You are an expert Amazon SEO copywriter for EU marketplaces. "
        "Rewrite the listing to maximize CTR and conversion while keeping it compliant. "
        "Output in the target language only. Follow constraints strictly."
    )

    constraints = (
        f"- Title: ≤ {MAX_TITLE_CHARS} characters.\n"
        f"- Provide exactly {BULLET_COUNT} concise bullets.\n"
        "- Use clear, benefit-driven language, include seed keywords naturally.\n"
        "- Keep measurements in metric; avoid claims that require certifications.\n"
        "- Respect EU localization (decimal commas, local idioms).\n"
    )

    seed_kw = ", ".join(user.seed_keywords) if user.seed_keywords else "(none)"

    content = f"""
TARGET_LANGUAGE: {user.target_lang_code}
SEED_KEYWORDS: {seed_kw}

ORIGINAL_TITLE: {user.title}
ORIGINAL_FEATURES:
- """ + "\n- ".join(feats) + f"""

ORIGINAL_DESCRIPTION:
{user.description}

Return JSON with fields: title, bullets (array of 5), description.
"""

    prompt = f"<|system|>\n{system}\n\nConstraints:\n{constraints}\n<|user|>\n{content}\n<|assistant|>"
    return prompt

def generate_listing(user: ListingInput) -> Tuple[str, List[str], str, Dict[str, str]]:
    client = InferenceClient(model=HF_TEXT_MODEL, token=HF_API_TOKEN)
    prompt = make_prompt(user)

    response = client.text_generation(
        prompt,
        max_new_tokens=700,
        temperature=0.6,
        top_p=0.9,
        repetition_penalty=1.1,
        return_full_text=False,
        stream=False,
    )

    json_match = re.search(r"\{[\s\S]*\}", response)
    title, bullets, desc = "", [], ""

    if json_match:
        import json
        try:
            data = json.loads(json_match.group(0))
            title = data.get("title", "").strip()
            bullets = [b.strip() for b in data.get("bullets", [])][:BULLET_COUNT]
            desc = data.get("description", "").strip()
        except Exception:
            pass

    if not title:
        lines = [l.strip() for l in response.splitlines() if l.strip()]
        title = next((l.split(":",1)[1].strip() for l in lines if l.lower().startswith("title") and ":" in l), lines[0] if lines else "")
        bullets = [l.lstrip("-• ").strip() for l in lines if l.startswith(("-","•"))][:BULLET_COUNT]
        if not bullets:
            bullets = [l for l in lines[1:1+BULLET_COUNT]]
        desc_idx = next((i for i,l in enumerate(lines) if l.lower().startswith("description")), None)
        if desc_idx is not None:
            desc = "\n".join(lines[desc_idx+1:])
        else:
            desc = "\n".join(lines[BULLET_COUNT+1:])

    title = ensure_keywords(title, user.seed_keywords, user.target_lang_code)
    desc  = ensure_keywords(desc,  user.seed_keywords, user.target_lang_code)

    bullets = (bullets + [""]*BULLET_COUNT)[:BULLET_COUNT]
    score = seo_score(title, bullets, desc, user.seed_keywords)
    return title, bullets, desc, score

# -------------------------
# UI
# -------------------------
with gr.Blocks(title="BleuPilot – Listing Optimizer", css=".mono{font-family:ui-monospace,Menlo,monospace}") as demo:
    gr.Markdown("""
# BleuPilot – Amazon Listing Optimizer (MVP)
Paste your current listing, choose a language, add seed keywords, and generate.
> Tip: Start with FR/DE/ES/IT to localize your EU listings quickly.
""")

    with gr.Row():
        with gr.Column():
            inp_title   = gr.Textbox(label="Original Title", placeholder="Enter current product title…", lines=2)
            inp_features= gr.Textbox(label="Features (one per line)", placeholder="Feature 1\nFeature 2\nFeature 3…", lines=8)
            inp_desc    = gr.Textbox(label="Original Description", placeholder="Paste current description…", lines=8)
            lang        = gr.Dropdown(list(SUPPORTED_LANGS.keys()), value="French (FR)", label="Target Language")
            kw          = gr.Textbox(label="Seed Keywords (comma or line-separated)", placeholder="chien, sac à déjections, biodégradable…", lines=3)
            run_btn     = gr.Button("Generate Optimized Listing 🚀", variant="primary")

        with gr.Column():
            out_title   = gr.Textbox(label="Optimized Title", lines=2)
            out_bullets = gr.Dataframe(headers=[f"Bullet {i+1}" for i in range(BULLET_COUNT)], row_count=1, col_count=BULLET_COUNT, wrap=True)
            out_desc    = gr.Textbox(label="Optimized Description", lines=10)

            with gr.Accordion("SEO Checks", open=False):
                score_title = gr.Markdown("")

    def _on_click(title, feats, desc, lang_label, kw_raw):
        user = ListingInput(
            title=title or "",
            features=feats or "",
            description=desc or "",
            target_lang_code=SUPPORTED_LANGS.get(lang_label, "fr"),
            seed_keywords=clean_keywords(kw_raw or ""),
        )
        new_title, bullets, new_desc, score = generate_listing(user)
        bullets_row = [bullets]
        score_md = (
            f"**Title length:** {score['title_length']} — {score['title_ok']}\n\n"
            f"**Bullet count:** {score['bullet_count']} — {score['bullet_ok']}\n\n"
            f"**Keyword coverage:** {score['keyword_coverage']}\n\n"
            f"**Missing keywords:** {score['keywords_missing']}"
        )
        return new_title, bullets_row, new_desc, score_md

    run_btn.click(_on_click, [inp_title, inp_features, inp_desc, lang, kw], [out_title, out_bullets, out_desc, score_title])

    gr.Markdown("""
---
### Notes
- For best results, supply 5–8 seed keywords you want included.
- Keep titles under ~200 chars. Some categories enforce smaller caps.
- This MVP uses serverless text-generation. For production scale, switch to **Inference Endpoints** (dedicated CPU/GPU) and add auth + rate limits.
""")

if __name__ == "__main__":
    demo.launch()