Spaces:

Jesiel-AI
/

bleupilot-listing-optimizer

Sleeping

App Files Files Community

Jesiel Rombley commited on Sep 14, 2025

Commit

4eecb9b

verified ·

1 Parent(s): 4b9be6d

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -61

app.py CHANGED Viewed

@@ -1,20 +1,10 @@
 """
 BleuPilot – Amazon Listing Optimizer (MVP)
--------------------------------------------------
 Self-contained Gradio app for Hugging Face Spaces.
 - Generates localized Amazon titles, 5 bullets, and a description (FR/EN/DE/ES/IT)
 - Simple keyword enforcement and SEO checks
 - Uses Hugging Face serverless Inference API via `huggingface_hub.InferenceClient`
-How to deploy on a Space (summary):
-1) Create a new Space (SDK: Gradio, Private or Public).
-2) Add two files: `app.py` (this file) and `requirements.txt` (see bottom comment).
-3) In Space Settings → Secrets, add: `HF_API_TOKEN` (with Inference API access).
-4) Commit & run. Optional: set `HF_TEXT_MODEL` space variable to switch models.
-Note: For best latency, start with a light instruct model available on serverless.
-Recommended default: "HuggingFaceH4/zephyr-7b-beta" (changeable via env var).
-You can later migrate hot paths to Inference Endpoints for predictable scale.
 """
 from __future__ import annotations
@@ -30,7 +20,7 @@ from huggingface_hub import InferenceClient
 # Config
 # -------------------------
 HF_TEXT_MODEL = os.getenv("HF_TEXT_MODEL", "HuggingFaceH4/zephyr-7b-beta")
-HF_API_TOKEN = os.getenv("HF_API_TOKEN", None)
 SUPPORTED_LANGS = {
     "French (FR)": "fr",
@@ -54,7 +44,6 @@ class ListingInput:
     target_lang_code: str
     seed_keywords: List[str]
 def clean_keywords(raw: str) -> List[str]:
     if not raw.strip():
         return []
@@ -62,7 +51,6 @@ def clean_keywords(raw: str) -> List[str]:
     items = [re.sub(r"\s+", " ", s).strip() for s in items]
     return [s for s in items if s]
 def ensure_keywords(text: str, keywords: List[str], lang_code: str) -> str:
     """Naive keyword enforcement: if a keyword is missing, append a short clause."""
     if not keywords:
@@ -70,7 +58,6 @@ def ensure_keywords(text: str, keywords: List[str], lang_code: str) -> str:
     missing = [kw for kw in keywords if re.search(rf"\b{re.escape(kw)}\b", text, flags=re.IGNORECASE) is None]
     if missing:
         extra = "; ".join(missing)
-        # Append in a natural way per language
         suffix_map = {
             "fr": f" Mots-clés inclus : {extra}.",
             "en": f" Keywords included: {extra}.",
@@ -81,7 +68,6 @@ def ensure_keywords(text: str, keywords: List[str], lang_code: str) -> str:
         text += suffix_map.get(lang_code, f" Keywords: {extra}.")
     return text
 def seo_score(title: str, bullets: List[str], desc: str, keywords: List[str]) -> Dict[str, str]:
     score = {}
     title_len = len(title)
@@ -91,7 +77,6 @@ def seo_score(title: str, bullets: List[str], desc: str, keywords: List[str]) ->
     score["bullet_count"] = f"{len(bullets)} (target {BULLET_COUNT})"
     score["bullet_ok"] = "✅" if len(bullets) == BULLET_COUNT else "⚠️ Aim for 5 bullets"
-    # Keyword coverage (simple substring check across all blocks)
     blob = "\n".join([title] + bullets + [desc]).lower()
     coverage = 0
     missing = []
@@ -106,15 +91,11 @@ def seo_score(title: str, bullets: List[str], desc: str, keywords: List[str]) ->
     else:
         score["keyword_coverage"] = "N/A"
     score["keywords_missing"] = ", ".join(missing) if missing else "None"
     return score
 def make_prompt(user: ListingInput) -> str:
-    # Normalize features into list
     feats = [s.strip() for s in re.split(r"[\n•\-\u2022]", user.features) if s.strip()]
-    # System-style instructions for instruct models
     system = (
         "You are an expert Amazon SEO copywriter for EU marketplaces. "
         "Rewrite the listing to maximize CTR and conversion while keeping it compliant. "
@@ -136,7 +117,8 @@ TARGET_LANGUAGE: {user.target_lang_code}
 SEED_KEYWORDS: {seed_kw}
 ORIGINAL_TITLE: {user.title}
-ORIGINAL_FEATURES:\n- """ + "\n- ".join(feats) + f"""
 ORIGINAL_DESCRIPTION:
 {user.description}
@@ -147,13 +129,10 @@ Return JSON with fields: title, bullets (array of 5), description.
     prompt = f"<|system|>\n{system}\n\nConstraints:\n{constraints}\n<|user|>\n{content}\n<|assistant|>"
     return prompt
 def generate_listing(user: ListingInput) -> Tuple[str, List[str], str, Dict[str, str]]:
     client = InferenceClient(model=HF_TEXT_MODEL, token=HF_API_TOKEN)
     prompt = make_prompt(user)
-    # Text-generation params tuned for instruction models
     response = client.text_generation(
         prompt,
         max_new_tokens=700,
@@ -164,7 +143,6 @@ def generate_listing(user: ListingInput) -> Tuple[str, List[str], str, Dict[str,
         stream=False,
     )
-    # Heuristic: extract JSON block
     json_match = re.search(r"\{[\s\S]*\}", response)
     title, bullets, desc = "", [], ""
@@ -178,15 +156,10 @@ def generate_listing(user: ListingInput) -> Tuple[str, List[str], str, Dict[str,
         except Exception:
             pass
-    # Fallback: try to split text if JSON parsing failed
     if not title:
-        # naive parsing
         lines = [l.strip() for l in response.splitlines() if l.strip()]
-        # find title
         title = next((l.split(":",1)[1].strip() for l in lines if l.lower().startswith("title") and ":" in l), lines[0] if lines else "")
-        # bullets
         bullets = [l.lstrip("-• ").strip() for l in lines if l.startswith(("-","•"))][:BULLET_COUNT]
-        # description
         if not bullets:
             bullets = [l for l in lines[1:1+BULLET_COUNT]]
         desc_idx = next((i for i,l in enumerate(lines) if l.lower().startswith("description")), None)
@@ -195,19 +168,13 @@ def generate_listing(user: ListingInput) -> Tuple[str, List[str], str, Dict[str,
         else:
             desc = "\n".join(lines[BULLET_COUNT+1:])
-    # Keyword enforcement
     title = ensure_keywords(title, user.seed_keywords, user.target_lang_code)
-    desc = ensure_keywords(desc, user.seed_keywords, user.target_lang_code)
-    # Pad/trim bullets to exactly 5
     bullets = (bullets + [""]*BULLET_COUNT)[:BULLET_COUNT]
-    # SEO score
     score = seo_score(title, bullets, desc, user.seed_keywords)
     return title, bullets, desc, score
 # -------------------------
 # UI
 # -------------------------
@@ -220,17 +187,17 @@ Paste your current listing, choose a language, add seed keywords, and generate.
     with gr.Row():
         with gr.Column():
-            inp_title = gr.Textbox(label="Original Title", placeholder="Enter current product title…", lines=2)
-            inp_features = gr.Textbox(label="Features (one per line)", placeholder="Feature 1\nFeature 2\nFeature 3…", lines=8)
-            inp_desc = gr.Textbox(label="Original Description", placeholder="Paste current description…", lines=8)
-            lang = gr.Dropdown(list(SUPPORTED_LANGS.keys()), value="French (FR)", label="Target Language")
-            kw = gr.Textbox(label="Seed Keywords (comma or line-separated)", placeholder="chien, sac à déjections, biodégradable…", lines=3)
-            run_btn = gr.Button("Generate Optimized Listing 🚀", variant="primary")
         with gr.Column():
-            out_title = gr.Textbox(label="Optimized Title", lines=2)
             out_bullets = gr.Dataframe(headers=[f"Bullet {i+1}" for i in range(BULLET_COUNT)], row_count=1, col_count=BULLET_COUNT, wrap=True)
-            out_desc = gr.Textbox(label="Optimized Description", lines=10)
             with gr.Accordion("SEO Checks", open=False):
                 score_title = gr.Markdown("")
@@ -244,9 +211,7 @@ Paste your current listing, choose a language, add seed keywords, and generate.
             seed_keywords=clean_keywords(kw_raw or ""),
         )
         new_title, bullets, new_desc, score = generate_listing(user)
-        # Convert bullets to a single-row dataframe structure
         bullets_row = [bullets]
-        # Render score as markdown
         score_md = (
             f"**Title length:** {score['title_length']} — {score['title_ok']}\n\n"
             f"**Bullet count:** {score['bullet_count']} — {score['bullet_ok']}\n\n"
@@ -257,24 +222,13 @@ Paste your current listing, choose a language, add seed keywords, and generate.
     run_btn.click(_on_click, [inp_title, inp_features, inp_desc, lang, kw], [out_title, out_bullets, out_desc, score_title])
-    gr.Markdown(
-        """
 ---
 ### Notes
 - For best results, supply 5–8 seed keywords you want included.
 - Keep titles under ~200 chars. Some categories enforce smaller caps.
 - This MVP uses serverless text-generation. For production scale, switch to **Inference Endpoints** (dedicated CPU/GPU) and add auth + rate limits.
-        """
-    )
 if __name__ == "__main__":
     demo.launch()
-# -------------------------
-# requirements.txt (create a separate file in your Space)
-# -------------------------
-# gradio>=4.31.0
-# huggingface_hub>=0.23.0
-#
-# Optionally pin a specific version of transformers if you later switch to local models
-# transformers>=4.41.0

 """
 BleuPilot – Amazon Listing Optimizer (MVP)
+------------------------------------------
 Self-contained Gradio app for Hugging Face Spaces.
 - Generates localized Amazon titles, 5 bullets, and a description (FR/EN/DE/ES/IT)
 - Simple keyword enforcement and SEO checks
 - Uses Hugging Face serverless Inference API via `huggingface_hub.InferenceClient`
 """
 from __future__ import annotations
 # Config
 # -------------------------
 HF_TEXT_MODEL = os.getenv("HF_TEXT_MODEL", "HuggingFaceH4/zephyr-7b-beta")
+HF_API_TOKEN  = os.getenv("HF_API_TOKEN", None)
 SUPPORTED_LANGS = {
     "French (FR)": "fr",
     target_lang_code: str
     seed_keywords: List[str]
 def clean_keywords(raw: str) -> List[str]:
     if not raw.strip():
         return []
     items = [re.sub(r"\s+", " ", s).strip() for s in items]
     return [s for s in items if s]
 def ensure_keywords(text: str, keywords: List[str], lang_code: str) -> str:
     """Naive keyword enforcement: if a keyword is missing, append a short clause."""
     if not keywords:
     missing = [kw for kw in keywords if re.search(rf"\b{re.escape(kw)}\b", text, flags=re.IGNORECASE) is None]
     if missing:
         extra = "; ".join(missing)
         suffix_map = {
             "fr": f" Mots-clés inclus : {extra}.",
             "en": f" Keywords included: {extra}.",
         text += suffix_map.get(lang_code, f" Keywords: {extra}.")
     return text
 def seo_score(title: str, bullets: List[str], desc: str, keywords: List[str]) -> Dict[str, str]:
     score = {}
     title_len = len(title)
     score["bullet_count"] = f"{len(bullets)} (target {BULLET_COUNT})"
     score["bullet_ok"] = "✅" if len(bullets) == BULLET_COUNT else "⚠️ Aim for 5 bullets"
     blob = "\n".join([title] + bullets + [desc]).lower()
     coverage = 0
     missing = []
     else:
         score["keyword_coverage"] = "N/A"
     score["keywords_missing"] = ", ".join(missing) if missing else "None"
     return score
 def make_prompt(user: ListingInput) -> str:
     feats = [s.strip() for s in re.split(r"[\n•\-\u2022]", user.features) if s.strip()]
     system = (
         "You are an expert Amazon SEO copywriter for EU marketplaces. "
         "Rewrite the listing to maximize CTR and conversion while keeping it compliant. "
 SEED_KEYWORDS: {seed_kw}
 ORIGINAL_TITLE: {user.title}
+ORIGINAL_FEATURES:
+- """ + "\n- ".join(feats) + f"""
 ORIGINAL_DESCRIPTION:
 {user.description}
     prompt = f"<|system|>\n{system}\n\nConstraints:\n{constraints}\n<|user|>\n{content}\n<|assistant|>"
     return prompt
 def generate_listing(user: ListingInput) -> Tuple[str, List[str], str, Dict[str, str]]:
     client = InferenceClient(model=HF_TEXT_MODEL, token=HF_API_TOKEN)
     prompt = make_prompt(user)
     response = client.text_generation(
         prompt,
         max_new_tokens=700,
         stream=False,
     )
     json_match = re.search(r"\{[\s\S]*\}", response)
     title, bullets, desc = "", [], ""
         except Exception:
             pass
     if not title:
         lines = [l.strip() for l in response.splitlines() if l.strip()]
         title = next((l.split(":",1)[1].strip() for l in lines if l.lower().startswith("title") and ":" in l), lines[0] if lines else "")
         bullets = [l.lstrip("-• ").strip() for l in lines if l.startswith(("-","•"))][:BULLET_COUNT]
         if not bullets:
             bullets = [l for l in lines[1:1+BULLET_COUNT]]
         desc_idx = next((i for i,l in enumerate(lines) if l.lower().startswith("description")), None)
         else:
             desc = "\n".join(lines[BULLET_COUNT+1:])
     title = ensure_keywords(title, user.seed_keywords, user.target_lang_code)
+    desc  = ensure_keywords(desc,  user.seed_keywords, user.target_lang_code)
     bullets = (bullets + [""]*BULLET_COUNT)[:BULLET_COUNT]
     score = seo_score(title, bullets, desc, user.seed_keywords)
     return title, bullets, desc, score
 # -------------------------
 # UI
 # -------------------------
     with gr.Row():
         with gr.Column():
+            inp_title   = gr.Textbox(label="Original Title", placeholder="Enter current product title…", lines=2)
+            inp_features= gr.Textbox(label="Features (one per line)", placeholder="Feature 1\nFeature 2\nFeature 3…", lines=8)
+            inp_desc    = gr.Textbox(label="Original Description", placeholder="Paste current description…", lines=8)
+            lang        = gr.Dropdown(list(SUPPORTED_LANGS.keys()), value="French (FR)", label="Target Language")
+            kw          = gr.Textbox(label="Seed Keywords (comma or line-separated)", placeholder="chien, sac à déjections, biodégradable…", lines=3)
+            run_btn     = gr.Button("Generate Optimized Listing 🚀", variant="primary")
         with gr.Column():
+            out_title   = gr.Textbox(label="Optimized Title", lines=2)
             out_bullets = gr.Dataframe(headers=[f"Bullet {i+1}" for i in range(BULLET_COUNT)], row_count=1, col_count=BULLET_COUNT, wrap=True)
+            out_desc    = gr.Textbox(label="Optimized Description", lines=10)
             with gr.Accordion("SEO Checks", open=False):
                 score_title = gr.Markdown("")
             seed_keywords=clean_keywords(kw_raw or ""),
         )
         new_title, bullets, new_desc, score = generate_listing(user)
         bullets_row = [bullets]
         score_md = (
             f"**Title length:** {score['title_length']} — {score['title_ok']}\n\n"
             f"**Bullet count:** {score['bullet_count']} — {score['bullet_ok']}\n\n"
     run_btn.click(_on_click, [inp_title, inp_features, inp_desc, lang, kw], [out_title, out_bullets, out_desc, score_title])
+    gr.Markdown("""
 ---
 ### Notes
 - For best results, supply 5–8 seed keywords you want included.
 - Keep titles under ~200 chars. Some categories enforce smaller caps.
 - This MVP uses serverless text-generation. For production scale, switch to **Inference Endpoints** (dedicated CPU/GPU) and add auth + rate limits.
+""")
 if __name__ == "__main__":
     demo.launch()