File size: 9,161 Bytes
4b9be6d
 
4eecb9b
4b9be6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4eecb9b
4b9be6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4eecb9b
 
4b9be6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4eecb9b
4b9be6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4eecb9b
 
 
 
 
 
4b9be6d
 
4eecb9b
4b9be6d
4eecb9b
4b9be6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4eecb9b
4b9be6d
 
 
 
 
4eecb9b
4b9be6d
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
"""
BleuPilot – Amazon Listing Optimizer (MVP)
------------------------------------------
Self-contained Gradio app for Hugging Face Spaces.
- Generates localized Amazon titles, 5 bullets, and a description (FR/EN/DE/ES/IT)
- Simple keyword enforcement and SEO checks
- Uses Hugging Face serverless Inference API via `huggingface_hub.InferenceClient`
"""

from __future__ import annotations
import os
import re
from dataclasses import dataclass
from typing import List, Dict, Tuple

import gradio as gr
from huggingface_hub import InferenceClient

# -------------------------
# Config
# -------------------------
HF_TEXT_MODEL = os.getenv("HF_TEXT_MODEL", "HuggingFaceH4/zephyr-7b-beta")
HF_API_TOKEN  = os.getenv("HF_API_TOKEN", None)

SUPPORTED_LANGS = {
    "French (FR)": "fr",
    "English (EN)": "en",
    "German (DE)": "de",
    "Spanish (ES)": "es",
    "Italian (IT)": "it",
}

MAX_TITLE_CHARS = 200  # Amazon soft cap (varies by category)
BULLET_COUNT = 5

# -------------------------
# Helpers
# -------------------------
@dataclass
class ListingInput:
    title: str
    features: str  # one per line or comma-separated
    description: str
    target_lang_code: str
    seed_keywords: List[str]

def clean_keywords(raw: str) -> List[str]:
    if not raw.strip():
        return []
    items = re.split(r"[\n,;]", raw)
    items = [re.sub(r"\s+", " ", s).strip() for s in items]
    return [s for s in items if s]

def ensure_keywords(text: str, keywords: List[str], lang_code: str) -> str:
    """Naive keyword enforcement: if a keyword is missing, append a short clause."""
    if not keywords:
        return text
    missing = [kw for kw in keywords if re.search(rf"\b{re.escape(kw)}\b", text, flags=re.IGNORECASE) is None]
    if missing:
        extra = "; ".join(missing)
        suffix_map = {
            "fr": f" Mots-clés inclus : {extra}.",
            "en": f" Keywords included: {extra}.",
            "de": f" Enthaltene Schlüsselwörter: {extra}.",
            "es": f" Palabras clave incluidas: {extra}.",
            "it": f" Parole chiave incluse: {extra}.",
        }
        text += suffix_map.get(lang_code, f" Keywords: {extra}.")
    return text

def seo_score(title: str, bullets: List[str], desc: str, keywords: List[str]) -> Dict[str, str]:
    score = {}
    title_len = len(title)
    score["title_length"] = f"{title_len} / {MAX_TITLE_CHARS} chars"
    score["title_ok"] = "✅" if title_len <= MAX_TITLE_CHARS else "❌ Too long"

    score["bullet_count"] = f"{len(bullets)} (target {BULLET_COUNT})"
    score["bullet_ok"] = "✅" if len(bullets) == BULLET_COUNT else "⚠️ Aim for 5 bullets"

    blob = "\n".join([title] + bullets + [desc]).lower()
    coverage = 0
    missing = []
    for kw in keywords:
        if kw.lower() in blob:
            coverage += 1
        else:
            missing.append(kw)
    if keywords:
        pct = int(round(100 * coverage / max(1, len(keywords))))
        score["keyword_coverage"] = f"{coverage}/{len(keywords)} ({pct}%)"
    else:
        score["keyword_coverage"] = "N/A"
    score["keywords_missing"] = ", ".join(missing) if missing else "None"
    return score

def make_prompt(user: ListingInput) -> str:
    feats = [s.strip() for s in re.split(r"[\n•\-\u2022]", user.features) if s.strip()]

    system = (
        "You are an expert Amazon SEO copywriter for EU marketplaces. "
        "Rewrite the listing to maximize CTR and conversion while keeping it compliant. "
        "Output in the target language only. Follow constraints strictly."
    )

    constraints = (
        f"- Title: ≤ {MAX_TITLE_CHARS} characters.\n"
        f"- Provide exactly {BULLET_COUNT} concise bullets.\n"
        "- Use clear, benefit-driven language, include seed keywords naturally.\n"
        "- Keep measurements in metric; avoid claims that require certifications.\n"
        "- Respect EU localization (decimal commas, local idioms).\n"
    )

    seed_kw = ", ".join(user.seed_keywords) if user.seed_keywords else "(none)"

    content = f"""
TARGET_LANGUAGE: {user.target_lang_code}
SEED_KEYWORDS: {seed_kw}

ORIGINAL_TITLE: {user.title}
ORIGINAL_FEATURES:
- """ + "\n- ".join(feats) + f"""

ORIGINAL_DESCRIPTION:
{user.description}

Return JSON with fields: title, bullets (array of 5), description.
"""

    prompt = f"<|system|>\n{system}\n\nConstraints:\n{constraints}\n<|user|>\n{content}\n<|assistant|>"
    return prompt

def generate_listing(user: ListingInput) -> Tuple[str, List[str], str, Dict[str, str]]:
    client = InferenceClient(model=HF_TEXT_MODEL, token=HF_API_TOKEN)
    prompt = make_prompt(user)

    response = client.text_generation(
        prompt,
        max_new_tokens=700,
        temperature=0.6,
        top_p=0.9,
        repetition_penalty=1.1,
        return_full_text=False,
        stream=False,
    )

    json_match = re.search(r"\{[\s\S]*\}", response)
    title, bullets, desc = "", [], ""

    if json_match:
        import json
        try:
            data = json.loads(json_match.group(0))
            title = data.get("title", "").strip()
            bullets = [b.strip() for b in data.get("bullets", [])][:BULLET_COUNT]
            desc = data.get("description", "").strip()
        except Exception:
            pass

    if not title:
        lines = [l.strip() for l in response.splitlines() if l.strip()]
        title = next((l.split(":",1)[1].strip() for l in lines if l.lower().startswith("title") and ":" in l), lines[0] if lines else "")
        bullets = [l.lstrip("-• ").strip() for l in lines if l.startswith(("-","•"))][:BULLET_COUNT]
        if not bullets:
            bullets = [l for l in lines[1:1+BULLET_COUNT]]
        desc_idx = next((i for i,l in enumerate(lines) if l.lower().startswith("description")), None)
        if desc_idx is not None:
            desc = "\n".join(lines[desc_idx+1:])
        else:
            desc = "\n".join(lines[BULLET_COUNT+1:])

    title = ensure_keywords(title, user.seed_keywords, user.target_lang_code)
    desc  = ensure_keywords(desc,  user.seed_keywords, user.target_lang_code)

    bullets = (bullets + [""]*BULLET_COUNT)[:BULLET_COUNT]
    score = seo_score(title, bullets, desc, user.seed_keywords)
    return title, bullets, desc, score

# -------------------------
# UI
# -------------------------
with gr.Blocks(title="BleuPilot – Listing Optimizer", css=".mono{font-family:ui-monospace,Menlo,monospace}") as demo:
    gr.Markdown("""
# BleuPilot – Amazon Listing Optimizer (MVP)
Paste your current listing, choose a language, add seed keywords, and generate.
> Tip: Start with FR/DE/ES/IT to localize your EU listings quickly.
""")

    with gr.Row():
        with gr.Column():
            inp_title   = gr.Textbox(label="Original Title", placeholder="Enter current product title…", lines=2)
            inp_features= gr.Textbox(label="Features (one per line)", placeholder="Feature 1\nFeature 2\nFeature 3…", lines=8)
            inp_desc    = gr.Textbox(label="Original Description", placeholder="Paste current description…", lines=8)
            lang        = gr.Dropdown(list(SUPPORTED_LANGS.keys()), value="French (FR)", label="Target Language")
            kw          = gr.Textbox(label="Seed Keywords (comma or line-separated)", placeholder="chien, sac à déjections, biodégradable…", lines=3)
            run_btn     = gr.Button("Generate Optimized Listing 🚀", variant="primary")

        with gr.Column():
            out_title   = gr.Textbox(label="Optimized Title", lines=2)
            out_bullets = gr.Dataframe(headers=[f"Bullet {i+1}" for i in range(BULLET_COUNT)], row_count=1, col_count=BULLET_COUNT, wrap=True)
            out_desc    = gr.Textbox(label="Optimized Description", lines=10)

            with gr.Accordion("SEO Checks", open=False):
                score_title = gr.Markdown("")

    def _on_click(title, feats, desc, lang_label, kw_raw):
        user = ListingInput(
            title=title or "",
            features=feats or "",
            description=desc or "",
            target_lang_code=SUPPORTED_LANGS.get(lang_label, "fr"),
            seed_keywords=clean_keywords(kw_raw or ""),
        )
        new_title, bullets, new_desc, score = generate_listing(user)
        bullets_row = [bullets]
        score_md = (
            f"**Title length:** {score['title_length']}{score['title_ok']}\n\n"
            f"**Bullet count:** {score['bullet_count']}{score['bullet_ok']}\n\n"
            f"**Keyword coverage:** {score['keyword_coverage']}\n\n"
            f"**Missing keywords:** {score['keywords_missing']}"
        )
        return new_title, bullets_row, new_desc, score_md

    run_btn.click(_on_click, [inp_title, inp_features, inp_desc, lang, kw], [out_title, out_bullets, out_desc, score_title])

    gr.Markdown("""
---
### Notes
- For best results, supply 5–8 seed keywords you want included.
- Keep titles under ~200 chars. Some categories enforce smaller caps.
- This MVP uses serverless text-generation. For production scale, switch to **Inference Endpoints** (dedicated CPU/GPU) and add auth + rate limits.
""")

if __name__ == "__main__":
    demo.launch()