Spaces:

roguchi
/

ultima_seo

Sleeping

App Files Files Community

rsm-roguchi commited on Jul 30, 2025

Commit

3242f2a

1 Parent(s): 74f1fe2

pokemon center

Browse files

Files changed (3) hide show

app.py +4 -4
server/general_blog.py +184 -164
ui/general_blog.py +1 -2

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import os
 from ui import (
     blog,
-    #general_blog,
     meta,
     twitter,
     price_matching
@@ -13,7 +13,7 @@ from ui import (
 from server import (
     blog as blog_srv,
-    #general_blog as general_blog_srv,
     meta as meta_srv,
     twitter as twitter_srv,
     price_matching as price_matching_srv
@@ -23,7 +23,7 @@ from server import (
 ui = ui.page_fluid(
     ui.page_navbar(
         blog.ui,
-        #general_blog.ui,
         meta.ui,
         twitter.ui,
         price_matching.ui,
@@ -36,7 +36,7 @@ ui = ui.page_fluid(
 def server(input, output, session):
     blog_srv.server(input, output, session)
-    #general_blog_srv.server(input, output, session)
     meta_srv.server(input, output, session)
     twitter_srv.server(input, output, session)
     price_matching_srv.server(input, output, session)

 from ui import (
     blog,
+    general_blog,
     meta,
     twitter,
     price_matching
 from server import (
     blog as blog_srv,
+    general_blog as general_blog_srv,
     meta as meta_srv,
     twitter as twitter_srv,
     price_matching as price_matching_srv
 ui = ui.page_fluid(
     ui.page_navbar(
         blog.ui,
+        general_blog.ui,
         meta.ui,
         twitter.ui,
         price_matching.ui,
 def server(input, output, session):
     blog_srv.server(input, output, session)
+    general_blog_srv.server(input, output, session)
     meta_srv.server(input, output, session)
     twitter_srv.server(input, output, session)
     price_matching_srv.server(input, output, session)

server/general_blog.py CHANGED Viewed

@@ -2,8 +2,8 @@ import os, sys, re, ast, time, requests
 from bs4 import BeautifulSoup
 from pytrends.request import TrendReq
 from shiny import ui, reactive, render
 from dotenv import load_dotenv
-import tweepy
 # === LLM Connect ===
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "code")))
@@ -16,53 +16,29 @@ SHOPIFY_TOKEN = os.getenv("SHOPIFY_TOKEN")
 SHOPIFY_API_VERSION = "2024-04"
 BLOG_ID = "73667707064"
-API_KEY = os.getenv("TWITTER_ACC_API_KEY")
-API_SECRET = os.getenv("TWITTER_ACC_API_SECRET")
-ACCESS_TOKEN = os.getenv("TWITTER_ACC_ACCESS_TOKEN")
-ACCESS_TOKEN_SECRET = os.getenv("TWITTER_ACC_ACCESS_TOKEN_SECRET")
-client = tweepy.Client(
-    consumer_key=API_KEY,
-    consumer_secret=API_SECRET,
-    access_token=ACCESS_TOKEN,
-    access_token_secret=ACCESS_TOKEN_SECRET
-)
-generated_tweet = reactive.Value("")
-def generate_tweet_from_topic(topic: str) -> str:
-    prompt = (
-        f"You are a social media manager for a hobby e-commerce company called 'Ultima Supply'.\n"
-        f"Write a detailed, engaging Twitter post (min 200 characters max 280 characters) about this new blog post: '{topic}'.\n"
-        f"Include emojis and/or 3-5 SEO relevant hashtags. Use casual, fun language."
-        f"Also include a link to 'ultimasupply.com/blogs/news' to notify users of the new post."
-    )
-    return get_response(
-        input=prompt,
-        template=lambda x: x.strip(),
-        llm='gemini',
-        md=False,
-        temperature=0.9,
-        max_tokens=500
-    )
 # === Static scraper for pokemon.com ===
 def scrape_section_content_from_url(url: str) -> str:
     try:
-        resp = requests.get(url, timeout=10, headers={"User-Agent": "Mozilla/5.0"})
         if not resp.ok:
             print(f"[ERROR] Request failed: {resp.status_code}")
             return ""
         soup = BeautifulSoup(resp.text, "html.parser")
-        sections = soup.find_all("section", class_="bodytext", itemprop="articleBody")
-        if not sections:
-            print("[WARN] No <section class='bodytext' itemprop='articleBody'> found.")
             return ""
-        texts = [section.get_text(separator=" ", strip=True) for section in sections]
         return "\n\n".join(texts)
     except Exception as e:
@@ -70,33 +46,56 @@ def scrape_section_content_from_url(url: str) -> str:
         return ""
 # === Keyword generation + scraping ===
-def get_keywords_and_content(url: str, top_n=5, llm_n=25):
     scraped_text = scrape_section_content_from_url(url)
     if not scraped_text:
         print("[ERROR] No scraped content. Cannot proceed.")
         return [], ""
     try:
         condensed_prompt = (
-            "From the content below, extract 5 to 7 mid-specific Google search phrases that reflect real user intent. "
-            "They should describe product types, use cases, or collector topics — not brand names alone. "
-            "Avoid single-word topics and overly broad terms like 'pokemon'. Each phrase should be 2–5 words, lowercase, and ASCII only.\n\n"
-            "Return ONLY a valid Python list of strings. No bullets or explanation.\n"
             f"Content:\n{scraped_text}"
         )
-        condensed_topic_raw = get_response(condensed_prompt,
-                                           template=lambda x: x.strip(),
-                                           llm="gemini",
-                                           md=False,
-                                           temperature=0.6)
-        match = re.search(r"\[.*?\]", condensed_topic_raw, re.DOTALL)
-        condensed_topic = ast.literal_eval(match.group(0)) if match else ["trading cards"]
-        print(f"Topics: {condensed_topic}")
     except Exception as e:
-        print(f"[WARN] Keyword condensation failed: {e}")
         condensed_topic = ["trading cards"]
     all_suggestions = set()
     try:
         pytrends = TrendReq(hl="en-US", tz=360, timeout=10)
@@ -106,21 +105,31 @@ def get_keywords_and_content(url: str, top_n=5, llm_n=25):
             if suggestions:
                 titles = [s["title"] for s in suggestions]
                 all_suggestions.update(titles)
     except Exception as e:
-        print(f"[WARN] PyTrends failed: {e}")
     filtered_keywords = []
     if all_suggestions:
         filter_prompt = (
-            f"Scraped article:\n\n{scraped_text[:1500]}\n\n"
-            f"Keyword suggestions:\n{list(all_suggestions)}\n\n"
-            "Return only relevant keywords as a Python list of strings."
         )
-        raw_filtered = get_response(filter_prompt,
-                                    template=lambda x: x.strip(),
-                                    llm="gemini",
-                                    temperature=0.3,
-                                    md=False)
         match = re.search(r"\[.*?\]", raw_filtered)
         if match:
             try:
@@ -128,71 +137,67 @@ def get_keywords_and_content(url: str, top_n=5, llm_n=25):
             except:
                 filtered_keywords = []
     if not filtered_keywords:
         fallback_prompt = (
-            f"Generate {llm_n} niche SEO keyword phrases for this article:\n\n{scraped_text}\n\n"
-            "Comma-separated, lowercase 2–5 word phrases. No formatting."
         )
-        fallback_keywords_raw = get_response(fallback_prompt,
-                                             template=lambda x: x.strip(),
-                                             llm="gemini",
-                                             md=False,
-                                             temperature=0.7)
         filtered_keywords = [kw.strip() for kw in fallback_keywords_raw.split(",") if kw.strip()]
-    combined_keywords = list(dict.fromkeys(filtered_keywords))
     if len(combined_keywords) < 30:
         needed = 30 - len(combined_keywords)
         pad_prompt = (
-            f"Generate {needed} additional relevant SEO phrases for this content:\n\n{scraped_text}\n\n"
-            f"Do NOT create links as relevant SEO phrases."
-            "List of 2–5 word, lowercase ASCII keyword phrases. Return a Python list."
         )
-        pad_raw = get_response(pad_prompt,
-                               template=lambda x: x.strip(),
-                               llm="gemini",
-                               md=False,
-                               temperature=0.7)
-        pad_match = re.search(r"\[.*?\]", pad_raw)
-        pad_keywords = ast.literal_eval(pad_match.group(0)) if pad_match else []
         combined_keywords = list(dict.fromkeys(combined_keywords + pad_keywords))
-    print(f"Combined Keywords: {combined_keywords}")
-    return combined_keywords[:30], scraped_text
-# === Blog generation ===
-def generate_blog_post(scraped_text: str, keywords: list[str]) -> tuple[str, str]:
-    keyword_str = ", ".join(keywords)
-    title_prompt = (
-        f"Based on the following article:\n\n{scraped_text[:2000]}\n\n"
-        f"Return a short, descriptive blog post title (max 70 characters). Just the title."
-    )
-    title = get_response(title_prompt,
-                         template=lambda x: x.strip().replace('"', ''),
-                         llm="gemini",
-                         temperature=0.5,
-                         md=False
-                         )
-    blog_prompt = (
-        f"You are a content writer for a collectibles brand called 'Ultima Supply'.\n"
-        f"Adapt the following scraped content into a detailed, SEO-optimized HTML blog post.\n\n"
-        f"Scraped content:\n{scraped_text}\n\n"
-        f"Inject the following keywords naturally:\n{keyword_str}\n\n"
-        f"Use proper HTML: <h1> for title, <h2> for headers, <p> for text.\n"
-        f"Do NOT include markdown, images, code blocks, or backlinks to other websites.\n"
-        f"End with a call-to-action:\n<p>Visit <a href='https://ultima-supply.myshopify.com'>Ultima Supply</a> to explore more collectibles.</p>"
-    )
-    blog_html = get_response(blog_prompt,
-                             template=lambda x: x.strip(),
-                             llm="gemini",
-                             temperature=0.9,
-                             md=False)
-    blog_html = re.sub(r"```[a-zA-Z]*\n?", "", blog_html).replace("```", "").strip()
-    return title, blog_html
 # === Shopify publisher ===
@@ -208,93 +213,108 @@ def publish_blog_post(title: str, html_body: str, blog_id: str = BLOG_ID):
             "body_html": html_body
         }
     }
-    response = requests.post(url, json=data, headers=headers)
-    return (True, response.json()) if response.status_code == 201 else (False, response.text)
-def post_tweet(text: str) -> str:
-    try:
-        user = client.get_me()
-        print(f"[✅] Authenticated as: {user.data['username']} (ID: {user.data['id']})")
-    except Exception as e:
-        print(f"[❌] Failed to authenticate: {e}")
-    try:
-        print(text)
-        response = client.create_tweet(text=text)
-        return f"✅ Tweet posted (ID: {response.data['id']})"
-    except Exception as e:
-        return f"❌ Failed to post tweet: {e}"
-# === Shiny Server ===
 # === Shiny Server ===
 def server(input, output, session):
     related_keywords = reactive.Value([])
     generated_blog = reactive.Value(("", ""))  # (title, html_content)
-    twitter_status = reactive.Value("")
     @output
     @render.ui
     @reactive.event(input.blog_generate_btn)
-    def blog_result_gen():
         url = input.blog_url()
         if not url:
             return ui.HTML("<p><strong>⚠️ Please enter a URL.</strong></p>")
-        keywords, scraped = get_keywords_and_content(url)
-        if not scraped:
-            return ui.HTML("<p><strong>❌ Failed to scrape or extract content.</strong></p>")
         related_keywords.set(keywords)
-        title, blog_html = generate_blog_post(scraped, keywords)
-        generated_blog.set((title, blog_html))
-        tweet = generate_tweet_from_topic(blog_html)
-        generated_tweet.set(tweet)
         return ui.HTML(
-            f"<p><strong>✅ Blog generated with title:</strong> {title}</p>"
             f"<p>Click 'Post to Shopify' to publish.</p>{blog_html}"
-            f"<p><strong>Generated Tweet:</strong><br>{tweet}</p>"
         )
-    @output
-    @render.text
-    def tweet_post_status_gen_blog():
-        return twitter_status()
     @output
     @render.ui
-    def blog_keywords_used_gen():
         kws = related_keywords()
         if not kws:
             return ui.HTML("<p><strong>No SEO keywords retrieved yet.</strong></p>")
         return ui.HTML(
-            f"<p><strong>✅ SEO Keywords Injected ({len(kws)}):</strong></p><ul>" +
-            "".join(f"<li>{kw}</li>" for kw in kws) +
             "</ul>"
         )
     @reactive.effect
     @reactive.event(input.blog_post_btn)
     def post_to_shopify():
-        title, html = generated_blog()
         if not html:
             ui.notification_show("⚠️ No blog generated yet.", type="warning")
             return
-        success, response = publish_blog_post(title, html)
         if success:
             ui.notification_show("✅ Blog posted to Shopify successfully!", type="message")
         else:
             ui.notification_show(f"❌ Failed to publish: {response}", type="error")
-    @reactive.effect
-    @reactive.event(input.blog_post_btn)
-    def _():
-        tweet = generated_tweet()
-        if not tweet:
-            twitter_status.set("⚠️ No tweet generated yet.")
-        else:
-            twitter_result = post_tweet(tweet)
-            twitter_status.set(twitter_result)

 from bs4 import BeautifulSoup
 from pytrends.request import TrendReq
 from shiny import ui, reactive, render
+from playwright.async_api import async_playwright
 from dotenv import load_dotenv
 # === LLM Connect ===
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "code")))
 SHOPIFY_API_VERSION = "2024-04"
 BLOG_ID = "73667707064"
 # === Static scraper for pokemon.com ===
 def scrape_section_content_from_url(url: str) -> str:
     try:
+        resp = requests.get(url, timeout=10, headers={
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
+            "Accept-Language": "en-US,en;q=0.9",
+            "Referer": "https://www.google.com/"
+        })
         if not resp.ok:
             print(f"[ERROR] Request failed: {resp.status_code}")
             return ""
         soup = BeautifulSoup(resp.text, "html.parser")
+        # Match all divs that contain the full class string
+        content_blocks = soup.find_all("div")
+        if not content_blocks:
+            print("[WARN] No content blocks matched.")
             return ""
+        texts = [div.get_text(separator=" ", strip=True) for div in content_blocks]
+        print(f"[INFO] Extracted {len(texts)} content blocks.")
         return "\n\n".join(texts)
     except Exception as e:
         return ""
 # === Keyword generation + scraping ===
+async def get_keywords_and_content(url: str, top_n=5, llm_n=25):
     scraped_text = scrape_section_content_from_url(url)
     if not scraped_text:
         print("[ERROR] No scraped content. Cannot proceed.")
         return [], ""
+    # === Step 1: Extract condensed topic keywords ===
     try:
         condensed_prompt = (
+            "Extract exactly 5 to 7 Google search phrases from the content below that reflect real user search intent. "
+            "Each phrase should describe a specific product, use case, or collector topic — not generic brands or categories.\n\n"
+            "⚠️ Rules:\n"
+            "- Each phrase must be 2 to 5 words\n"
+            "- All phrases must be lowercase and ASCII-only\n"
+            "- Do NOT include apostrophes, single quotes, or quotation marks — rewrite or skip any phrases that contain them\n"
+            "- Do NOT include single words or overly broad terms like 'pokemon'\n"
+            "- Do NOT return line breaks, bullet points, or list formatting\n\n"
+            "✅ Output format:\n"
+            "Return a single comma-separated string of keyword phrases, with no brackets, no quotes, and no explanation.\n"
+            "Example output:\n"
+            "vintage charizard value, graded card pricing, rare booster packs, psa 10 umbreon, tcg price trends\n\n"
             f"Content:\n{scraped_text}"
         )
+        condensed_topic_raw = get_response(
+            input=condensed_prompt,
+            template=lambda x: x.strip(),
+            llm="gemini",
+            md=False,
+            temperature=0.6,
+            max_tokens=100,
+            model_name="gemini-2.0-flash-lite"
+        )
+        print(condensed_topic_raw)
+        # Parse comma-separated string
+        condensed_topic = [kw.strip() for kw in condensed_topic_raw.split(",") if kw.strip()]
+        if not condensed_topic:
+            condensed_topic = ["trading cards"]
+        print(f"[INFO] Condensed topic keywords: {condensed_topic}")
     except Exception as e:
+        print(f"[WARN] Could not infer topics: {e}")
         condensed_topic = ["trading cards"]
+    # === Step 2: Pull suggestions from PyTrends ===
     all_suggestions = set()
     try:
         pytrends = TrendReq(hl="en-US", tz=360, timeout=10)
             if suggestions:
                 titles = [s["title"] for s in suggestions]
                 all_suggestions.update(titles)
+                print(f"[INFO] Suggestions for '{topic}': {titles[:3]}")
     except Exception as e:
+        print(f"[WARN] PyTrends suggestions failed: {e}")
+    all_suggestions = list(all_suggestions)
+    # === Step 3: Let Gemini filter suggestions for relevance ===
     filtered_keywords = []
     if all_suggestions:
         filter_prompt = (
+            f"The following article was scraped:\n\n{scraped_text[:1500]}\n\n"
+            f"Here is a list of keyword suggestions:\n{all_suggestions}\n\n"
+            "Return only the keywords that are clearly relevant to the article topic. "
+            "Return a valid Python list of strings only. No explanation, bullets, or formatting."
+        )
+        raw_filtered = get_response(
+            input=filter_prompt,
+            template=lambda x: x.strip(),
+            llm="gemini",
+            md=False,
+            temperature=0.3,
+            max_tokens=200
         )
         match = re.search(r"\[.*?\]", raw_filtered)
         if match:
             try:
             except:
                 filtered_keywords = []
+    # === Step 4: Fallback to Gemini keyword generation if needed ===
     if not filtered_keywords:
         fallback_prompt = (
+            f"You are an SEO expert. Generate {llm_n} niche-relevant SEO keywords "
+            f"based on this content:\n\n{scraped_text}\n\n"
+            "Return a comma-separated list of lowercase 2–5 word search phrases. No formatting."
+        )
+        fallback_keywords_raw = get_response(
+            input=fallback_prompt,
+            template=lambda x: x.strip(),
+            llm="gemini",
+            md=False,
+            temperature=0.7,
+            max_tokens=400
         )
         filtered_keywords = [kw.strip() for kw in fallback_keywords_raw.split(",") if kw.strip()]
+        print(f"[INFO] Fallback keywords used: {filtered_keywords[:top_n]}")
+    # === Step 5: Enforce minimum of 30 keywords ===
+    combined_keywords = list(dict.fromkeys(filtered_keywords))  # remove duplicates
     if len(combined_keywords) < 30:
         needed = 30 - len(combined_keywords)
+        print(f"[INFO] Need {needed} more keywords to reach 30. Using Gemini to pad.")
         pad_prompt = (
+            f"The following article content is missing SEO keyword coverage:\n\n"
+            f"{scraped_text}\n\n"
+            f"Generate exactly {needed} additional SEO keyword phrases.\n"
+            "Each keyword must:\n"
+            "- be 2 to 5 words long\n"
+            "- be lowercase only\n"
+            "- use ASCII characters only (no symbols or accents)\n"
+            "- be clearly relevant to the article\n"
+            "- avoid generic terms like 'pokemon'\n\n"
+            "Return only the keywords as a single comma-separated string, with no extra formatting or explanation.\n"
+            "Example output:\n"
+            "keyword one, keyword two, keyword three"
         )
+        pad_raw = get_response(
+            input=pad_prompt,
+            template=lambda x: x.strip(),
+            llm="gemini",
+            md=False,
+            temperature=0.7,
+            max_tokens=200
+        )
+        pad_keywords = []
+        print(pad_raw)
+        try:
+            pad_keywords = [kw.strip() for kw in pad_raw.split(",") if kw.strip()]
+        except Exception as e:
+            print(f"[WARN] Keyword parsing failed: {e}")
+            pad_keywords = []
         combined_keywords = list(dict.fromkeys(combined_keywords + pad_keywords))
+        print(f"[INFO] Padded {len(pad_keywords)} keywords:", pad_keywords)
+        return combined_keywords[:30], scraped_text
 # === Shopify publisher ===
             "body_html": html_body
         }
     }
+    response = requests.post(url, json=data, headers=headers)
+    if response.status_code == 201:
+        return True, response.json()
+    else:
+        return False, response.text
 # === Shiny Server ===
 def server(input, output, session):
     related_keywords = reactive.Value([])
     generated_blog = reactive.Value(("", ""))  # (title, html_content)
     @output
     @render.ui
     @reactive.event(input.blog_generate_btn)
+    async def blog_result_gen():
         url = input.blog_url()
         if not url:
             return ui.HTML("<p><strong>⚠️ Please enter a URL.</strong></p>")
+        keywords, scraped = await get_keywords_and_content(url)
         related_keywords.set(keywords)
+        keyword_str = ", ".join(keywords)
+        # Title generation from scraped text
+        infer_topic_prompt = (
+            f"Based on the following article content:\n\n{scraped[:2000]}\n\n"
+            f"Return a short, descriptive blog post title (max 70 characters)."
+            f"Return ONLY the TITLE"
+        )
+        seo_title = get_response(
+            input=infer_topic_prompt,
+            template=lambda x: x.strip().replace('"', ''),
+            llm="gemini",
+            md=False,
+            temperature=0.5,
+            max_tokens=20
+        )
+        # Blog generation with injected SEO
+        prompt = (
+            f"You are a content writer for a collectibles brand called 'Ultima Supply'.\n"
+            f"Given the following scraped content:\n\n{scraped}\n\n"
+            f"Rewrite this in an engaging, original, and heavily detailed SEO-optimized blog post.\n"
+            f"Naturally and organically integrate the following SEO keywords throughout the content:\n{keyword_str}\n\n"
+            f"⚠️ STRICT FORMATTING RULES (must be followed exactly):\n"
+            f"- Use <h1> for the blog title\n"
+            f"- Use <h2> for section headers\n"
+            f"- Use <p> for all paragraphs\n"
+            f"- NO Markdown, NO triple backticks, NO code blocks, NO formatting fences\n"
+            f"- DO NOT include any hyperlinks, URLs, web addresses, or references to any external sites or brands — no exceptions\n"
+            f"- DO NOT include any <a> tags except for the final line below\n\n"
+            f"✅ FINAL LINE ONLY:\n"
+            f"Add this exact call-to-action at the very end of the post inside its own <p> tag:\n"
+            f"Visit <a href='https://ultima-supply.myshopify.com'>Ultima Supply</a> to explore more collectibles."
+        )
+        blog_html = get_response(
+            input=prompt,
+            template=lambda x: x.strip(),
+            llm="gemini",
+            md=False,
+            temperature=0.9,
+            max_tokens=5000
+        )
+        blog_html = re.sub(r"```[a-zA-Z]*\n?", "", blog_html).strip()
+        blog_html = blog_html.replace("```", "").strip()
+        generated_blog.set((seo_title, blog_html))
         return ui.HTML(
+            f"<p><strong>✅ Blog generated with title:</strong> {seo_title}</p>"
             f"<p>Click 'Post to Shopify' to publish.</p>{blog_html}"
         )
     @output
     @render.ui
+    def keywords_used_gen():
         kws = related_keywords()
         if not kws:
             return ui.HTML("<p><strong>No SEO keywords retrieved yet.</strong></p>")
         return ui.HTML(
+            f"<p><strong>✅ SEO Keywords Injected ({len(kws)}):</strong></p><ul>"
+            + "".join(f"<li>{kw}</li>" for kw in kws) +
             "</ul>"
         )
     @reactive.effect
     @reactive.event(input.blog_post_btn)
     def post_to_shopify():
+        seo_title, html = generated_blog()
         if not html:
             ui.notification_show("⚠️ No blog generated yet.", type="warning")
             return
+        success, response = publish_blog_post(title=seo_title, html_body=html)
         if success:
             ui.notification_show("✅ Blog posted to Shopify successfully!", type="message")
         else:
             ui.notification_show(f"❌ Failed to publish: {response}", type="error")

ui/general_blog.py CHANGED Viewed

@@ -15,6 +15,5 @@ ui = ui.nav_panel(
     # Scoped outputs
     ui.output_ui("blog_result_gen"),
-    ui.output_ui("blog_keywords_used_gen"),
-    ui.output_text('tweet_post_status_gen_blog')
 )

     # Scoped outputs
     ui.output_ui("blog_result_gen"),
+    ui.output_ui("blog_keywords_used_gen")
 )