Spaces:

Jesiel-AI
/

bleupilot-listing-optimizer

Sleeping

Jesiel Rombley

Update app.py

4eecb9b verified 8 months ago

9.16 kB

	"""
	BleuPilot – Amazon Listing Optimizer (MVP)
	------------------------------------------
	Self-contained Gradio app for Hugging Face Spaces.
	- Generates localized Amazon titles, 5 bullets, and a description (FR/EN/DE/ES/IT)
	- Simple keyword enforcement and SEO checks
	- Uses Hugging Face serverless Inference API via `huggingface_hub.InferenceClient`
	"""

	from __future__ import annotations
	import os
	import re
	from dataclasses import dataclass
	from typing import List, Dict, Tuple

	import gradio as gr
	from huggingface_hub import InferenceClient

	# -------------------------
	# Config
	# -------------------------
	HF_TEXT_MODEL = os.getenv("HF_TEXT_MODEL", "HuggingFaceH4/zephyr-7b-beta")
	HF_API_TOKEN = os.getenv("HF_API_TOKEN", None)

	SUPPORTED_LANGS = {
	"French (FR)": "fr",
	"English (EN)": "en",
	"German (DE)": "de",
	"Spanish (ES)": "es",
	"Italian (IT)": "it",
	}

	MAX_TITLE_CHARS = 200 # Amazon soft cap (varies by category)
	BULLET_COUNT = 5

	# -------------------------
	# Helpers
	# -------------------------
	@dataclass
	class ListingInput:
	title: str
	features: str # one per line or comma-separated
	description: str
	target_lang_code: str
	seed_keywords: List[str]

	def clean_keywords(raw: str) -> List[str]:
	if not raw.strip():
	return []
	items = re.split(r"[\n,;]", raw)
	items = [re.sub(r"\s+", " ", s).strip() for s in items]
	return [s for s in items if s]

	def ensure_keywords(text: str, keywords: List[str], lang_code: str) -> str:
	"""Naive keyword enforcement: if a keyword is missing, append a short clause."""
	if not keywords:
	return text
	missing = [kw for kw in keywords if re.search(rf"\b{re.escape(kw)}\b", text, flags=re.IGNORECASE) is None]
	if missing:
	extra = "; ".join(missing)
	suffix_map = {
	"fr": f" Mots-clés inclus : {extra}.",
	"en": f" Keywords included: {extra}.",
	"de": f" Enthaltene Schlüsselwörter: {extra}.",
	"es": f" Palabras clave incluidas: {extra}.",
	"it": f" Parole chiave incluse: {extra}.",
	}
	text += suffix_map.get(lang_code, f" Keywords: {extra}.")
	return text

	def seo_score(title: str, bullets: List[str], desc: str, keywords: List[str]) -> Dict[str, str]:
	score = {}
	title_len = len(title)
	score["title_length"] = f"{title_len} / {MAX_TITLE_CHARS} chars"
	score["title_ok"] = "✅" if title_len <= MAX_TITLE_CHARS else "❌ Too long"

	score["bullet_count"] = f"{len(bullets)} (target {BULLET_COUNT})"
	score["bullet_ok"] = "✅" if len(bullets) == BULLET_COUNT else "⚠️ Aim for 5 bullets"

	blob = "\n".join([title] + bullets + [desc]).lower()
	coverage = 0
	missing = []
	for kw in keywords:
	if kw.lower() in blob:
	coverage += 1
	else:
	missing.append(kw)
	if keywords:
	pct = int(round(100 * coverage / max(1, len(keywords))))
	score["keyword_coverage"] = f"{coverage}/{len(keywords)} ({pct}%)"
	else:
	score["keyword_coverage"] = "N/A"
	score["keywords_missing"] = ", ".join(missing) if missing else "None"
	return score

	def make_prompt(user: ListingInput) -> str:
	feats = [s.strip() for s in re.split(r"[\n•\-\u2022]", user.features) if s.strip()]

	system = (
	"You are an expert Amazon SEO copywriter for EU marketplaces. "
	"Rewrite the listing to maximize CTR and conversion while keeping it compliant. "
	"Output in the target language only. Follow constraints strictly."
	)

	constraints = (
	f"- Title: ≤ {MAX_TITLE_CHARS} characters.\n"
	f"- Provide exactly {BULLET_COUNT} concise bullets.\n"
	"- Use clear, benefit-driven language, include seed keywords naturally.\n"
	"- Keep measurements in metric; avoid claims that require certifications.\n"
	"- Respect EU localization (decimal commas, local idioms).\n"
	)

	seed_kw = ", ".join(user.seed_keywords) if user.seed_keywords else "(none)"

	content = f"""
	TARGET_LANGUAGE: {user.target_lang_code}
	SEED_KEYWORDS: {seed_kw}

	ORIGINAL_TITLE: {user.title}
	ORIGINAL_FEATURES:
	- """ + "\n- ".join(feats) + f"""

	ORIGINAL_DESCRIPTION:
	{user.description}

	Return JSON with fields: title, bullets (array of 5), description.
	"""

	prompt = f"<\|system\|>\n{system}\n\nConstraints:\n{constraints}\n<\|user\|>\n{content}\n<\|assistant\|>"
	return prompt

	def generate_listing(user: ListingInput) -> Tuple[str, List[str], str, Dict[str, str]]:
	client = InferenceClient(model=HF_TEXT_MODEL, token=HF_API_TOKEN)
	prompt = make_prompt(user)

	response = client.text_generation(
	prompt,
	max_new_tokens=700,
	temperature=0.6,
	top_p=0.9,
	repetition_penalty=1.1,
	return_full_text=False,
	stream=False,
	)

	json_match = re.search(r"\{[\s\S]*\}", response)
	title, bullets, desc = "", [], ""

	if json_match:
	import json
	try:
	data = json.loads(json_match.group(0))
	title = data.get("title", "").strip()
	bullets = [b.strip() for b in data.get("bullets", [])][:BULLET_COUNT]
	desc = data.get("description", "").strip()
	except Exception:
	pass

	if not title:
	lines = [l.strip() for l in response.splitlines() if l.strip()]
	title = next((l.split(":",1)[1].strip() for l in lines if l.lower().startswith("title") and ":" in l), lines[0] if lines else "")
	bullets = [l.lstrip("-• ").strip() for l in lines if l.startswith(("-","•"))][:BULLET_COUNT]
	if not bullets:
	bullets = [l for l in lines[1:1+BULLET_COUNT]]
	desc_idx = next((i for i,l in enumerate(lines) if l.lower().startswith("description")), None)
	if desc_idx is not None:
	desc = "\n".join(lines[desc_idx+1:])
	else:
	desc = "\n".join(lines[BULLET_COUNT+1:])

	title = ensure_keywords(title, user.seed_keywords, user.target_lang_code)
	desc = ensure_keywords(desc, user.seed_keywords, user.target_lang_code)

	bullets = (bullets + [""]*BULLET_COUNT)[:BULLET_COUNT]
	score = seo_score(title, bullets, desc, user.seed_keywords)
	return title, bullets, desc, score

	# -------------------------
	# UI
	# -------------------------
	with gr.Blocks(title="BleuPilot – Listing Optimizer", css=".mono{font-family:ui-monospace,Menlo,monospace}") as demo:
	gr.Markdown("""
	# BleuPilot – Amazon Listing Optimizer (MVP)
	Paste your current listing, choose a language, add seed keywords, and generate.
	> Tip: Start with FR/DE/ES/IT to localize your EU listings quickly.
	""")

	with gr.Row():
	with gr.Column():
	inp_title = gr.Textbox(label="Original Title", placeholder="Enter current product title…", lines=2)
	inp_features= gr.Textbox(label="Features (one per line)", placeholder="Feature 1\nFeature 2\nFeature 3…", lines=8)
	inp_desc = gr.Textbox(label="Original Description", placeholder="Paste current description…", lines=8)
	lang = gr.Dropdown(list(SUPPORTED_LANGS.keys()), value="French (FR)", label="Target Language")
	kw = gr.Textbox(label="Seed Keywords (comma or line-separated)", placeholder="chien, sac à déjections, biodégradable…", lines=3)
	run_btn = gr.Button("Generate Optimized Listing 🚀", variant="primary")

	with gr.Column():
	out_title = gr.Textbox(label="Optimized Title", lines=2)
	out_bullets = gr.Dataframe(headers=[f"Bullet {i+1}" for i in range(BULLET_COUNT)], row_count=1, col_count=BULLET_COUNT, wrap=True)
	out_desc = gr.Textbox(label="Optimized Description", lines=10)

	with gr.Accordion("SEO Checks", open=False):
	score_title = gr.Markdown("")

	def _on_click(title, feats, desc, lang_label, kw_raw):
	user = ListingInput(
	title=title or "",
	features=feats or "",
	description=desc or "",
	target_lang_code=SUPPORTED_LANGS.get(lang_label, "fr"),
	seed_keywords=clean_keywords(kw_raw or ""),
	)
	new_title, bullets, new_desc, score = generate_listing(user)
	bullets_row = [bullets]
	score_md = (
	f"Title length: {score['title_length']} — {score['title_ok']}\n\n"
	f"Bullet count: {score['bullet_count']} — {score['bullet_ok']}\n\n"
	f"Keyword coverage: {score['keyword_coverage']}\n\n"
	f"Missing keywords: {score['keywords_missing']}"
	)
	return new_title, bullets_row, new_desc, score_md

	run_btn.click(_on_click, [inp_title, inp_features, inp_desc, lang, kw], [out_title, out_bullets, out_desc, score_title])

	gr.Markdown("""
	---
	### Notes
	- For best results, supply 5–8 seed keywords you want included.
	- Keep titles under ~200 chars. Some categories enforce smaller caps.
	- This MVP uses serverless text-generation. For production scale, switch to Inference Endpoints (dedicated CPU/GPU) and add auth + rate limits.
	""")

	if __name__ == "__main__":
	demo.launch()