LinkedInPostGenerator1.1

Sleeping

App Files Files Community

Alpha108 commited on Nov 8, 2025

Commit

fb2e00d

verified ·

1 Parent(s): 88209fc

Update app.py

Browse files

Files changed (1) hide show

app.py +102 -136

app.py CHANGED Viewed

@@ -5,19 +5,18 @@ import math
 import streamlit as st
 import pandas as pd
-# =========================
-# 0) CONFIG / CONSTANTS
-# =========================
-GROQ_DEFAULT_MODEL = "llama-3.3-70b-versatile"  # Sidebar lets you change
-MAX_KEYWORDS = 12
-SEED_STOPWORDS = set("""
 a an and the or for nor but so yet of to in on with at by from as is are was were be being been
 i you he she it we they them us our your their this that these those here there
 """.split())
-# =========================
-# 1) GROQ CLIENT
-# =========================
 try:
     from groq import Groq
 except ImportError:
@@ -26,29 +25,31 @@ except ImportError:
 def get_groq_client():
     api_key = os.getenv("GROQ_API_KEY")
     if not api_key:
-        raise RuntimeError("Missing GROQ_API_KEY. Set it in Space → Settings → Variables & Secrets.")
     if Groq is None:
         raise RuntimeError("Package 'groq' not installed. Add 'groq' to requirements.txt.")
     return Groq(api_key=api_key)
-def groq_generate(prompt, model, temperature, top_p, max_tokens):
     client = get_groq_client()
     resp = client.chat.completions.create(
         model=model,
         messages=[
-            {"role": "system", "content": "You craft concise, insightful LinkedIn posts that feel original and practical."},
             {"role": "user", "content": prompt}
         ],
         temperature=temperature,
         top_p=top_p,
         max_tokens=max_tokens,
-        n=1
     )
     return resp.choices[0].message.content.strip()
-# =========================
-# 2) TEXT UTILS
-# =========================
 def dedupe_sentences(text: str) -> str:
     parts = re.split(r'(?<=[.!?])\s+', text.strip())
     seen = set()
@@ -74,12 +75,9 @@ def strip_labels(text: str) -> str:
         cleaned.append(L)
     return "\n".join(cleaned).strip()
-def clamp(n, lo, hi):
-    return max(lo, min(hi, n))
-# =========================
-# 3) DATA INGEST & KEYWORDS
-# =========================
 def load_posts_from_file(file) -> pd.DataFrame:
     name = file.name.lower()
     if name.endswith(".csv"):
@@ -87,13 +85,12 @@ def load_posts_from_file(file) -> pd.DataFrame:
     elif name.endswith(".json"):
         df = pd.read_json(file, lines=False)
     else:
-        raise ValueError("Upload a CSV or JSON file containing LinkedIn posts.")
-    # Normalize to 'text' column
-    candidate = [c for c in df.columns if c.lower() in ("text", "post", "content", "body")]
-    if not candidate:
-        raise ValueError("Dataset must have a 'text' (or post/content/body) column.")
     if "text" not in df.columns:
-        df["text"] = df[candidate[0]]
     df["text"] = df["text"].fillna("").astype(str)
     return df[["text"]]
@@ -101,7 +98,7 @@ def simple_rake(text, min_len=2, max_len=3, top_k=12):
     words = re.findall(r"[A-Za-z0-9#+\-_/']+", text.lower())
     phrases, cur = [], []
     for w in words:
-        if w in SEED_STOPWORDS:
             if cur:
                 phrases.append(" ".join(cur))
                 cur = []
@@ -109,178 +106,147 @@ def simple_rake(text, min_len=2, max_len=3, top_k=12):
             cur.append(w)
     if cur:
         phrases.append(" ".join(cur))
-    # Score by frequency+degree
     freq, degree = {}, {}
     for ph in phrases:
         toks = ph.split()
         for t in toks:
             freq[t] = freq.get(t, 0) + 1
-            degree[t] = degree.get(t, 0) + (len(toks) - 1)
     scores = {}
     for ph in phrases:
         s = 0.0
         for t in ph.split():
-            s += (degree.get(t, 0) + 1) / (freq.get(t, 1))
-        scores[ph] = scores.get(ph, 0) + s
     ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)
-    filtered = [p for p, _ in ranked if min_len <= len(p.split()) <= max_len]
     return filtered[:top_k]
-def tfidf_keywords_builder(texts, top_k=10):
     docs = [re.findall(r"[A-Za-z0-9#+\-_/']+", t.lower()) for t in texts]
     vocab = {}
     for d in docs:
         for w in set(d):
-            vocab.setdefault(w, {"df": 0})
-            vocab[w]["df"] += 1
     N = len(docs)
-    def score_doc(text):
         doc = re.findall(r"[A-Za-z0-9#+\-_/']+", text.lower())
         tf = {}
         for w in doc:
-            tf[w] = tf.get(w, 0) + 1
         scores = {}
-        for w, cnt in tf.items():
-            df = vocab.get(w, {}).get("df", 1)
-            idf = math.log((N + 1) / (df + 1)) + 1
-            scores[w] = (cnt / len(doc)) * idf
         ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)
-        return [w for w, _ in ranked[:top_k]]
-    return score_doc
-def extract_keywords(topic: str, posts_df: pd.DataFrame | None):
-    if posts_df is not None and len(posts_df):
-        sample = posts_df["text"].sample(min(30, len(posts_df)), random_state=42).tolist()
-        rake_kw = simple_rake(" ".join(sample + [topic]), min_len=2, max_len=3, top_k=MAX_KEYWORDS)
-        tfidf_fn = tfidf_keywords_builder(posts_df["text"].tolist(), top_k=MAX_KEYWORDS//2)
         kw2 = tfidf_fn(topic + " " + " ".join(sample[:5]))
-        all_kw = rake_kw + kw2
     else:
-        all_kw = simple_rake(topic, min_len=1, max_len=2, top_k=8)
     seen, out = set(), []
-    for k in all_kw:
-        k2 = re.sub(r"\s+", " ", k.strip().lower())
         if k2 and k2 not in seen:
-            seen.add(k2)
-            out.append(k2)
-    return out[:MAX_KEYWORDS]
-# =========================
-# 4) PROMPT (PLAIN OUTPUT)
-# =========================
-def build_viral_prompt(topic, audience, tone, target_len, style_refs, keywords):
-    style_block = "\n".join(f"- {s}" for s in style_refs[:4]) if style_refs else "- None"
-    kw_block = ", ".join(keywords[:8]) if keywords else "N/A"
     return (
         "You are a senior LinkedIn content strategist.\n"
-        "Objective: Write a viral, insightful LinkedIn post as plain text only (no section headers, no labels), "
-        f"around {target_len} words, for the audience and topic below.\n\n"
         f"Topic: \"{topic}\"\n"
-        f"Audience: \"{audience}\"\n"
         f"Tone: \"{tone}\"\n"
-        f"Keywords to naturally weave in: {kw_block}\n\n"
-        "Style cues (reflect these, do not list them):\n"
-        f"{style_block}\n\n"
-        "Apply silently (do not mention these rules):\n"
-        "- Open with a curiosity-driving first line.\n"
-        "- Use short sentences and short paragraphs.\n"
-        "- Include 3–5 concrete insights, examples, or steps (bullets allowed, but no section labels).\n"
-        "- Be specific, novel, and practical; avoid clichés and filler.\n"
-        "- Use up to 2 emojis; add 2–4 niche hashtags only at the very end (optional).\n"
-        "- Never output headings like HOOK/BODY/TAKEAWAY/CTA.\n"
-        "- Do not repeat the phrase: “it's a great example of how we can make a difference in the world.”\n\n"
-        "Output: A single cohesive LinkedIn post as plain text only. No headings. No metadata. No explanations."
     )
-# =========================
-# 5) STREAMLIT UI
-# =========================
-st.set_page_config(page_title="LinkedIn Post Generator — Groq", layout="centered")
-st.title("🔗 LinkedIn Post Generator — Dataset Keywords + Groq")
-st.caption("Upload sample posts, extract keywords, and generate plain-text viral posts via Groq.")
 with st.sidebar:
     st.subheader("Groq & Decoding")
     model = st.selectbox(
         "Groq model",
-        options=[
-            "llama-3.3-70b-versatile",
-            "llama-3.1-8b-instant",
-            "mixtral-8x7b-32768"
-        ],
         index=0
     )
     temperature = st.slider("Temperature", 0.1, 1.2, 0.6, 0.05)
-    top_p = st.slider("Top-p", 0.1, 1.0, 0.9, 0.05)
     target_len = st.slider("Target length (words)", 60, 300, 140, 10)
     st.markdown("Set GROQ_API_KEY in Space → Settings → Variables & Secrets.")
-with st.form("gen_form"):
     topic = st.text_input("Topic", "Generative AI for Business")
-    tone = st.selectbox("Tone", ["Professional", "Friendly", "Inspirational", "Technical", "Concise"], index=0)
-    audience = st.text_input("Audience", "Startup founders")
-    st.markdown("### Upload dataset (CSV/JSON) of LinkedIn posts")
-    uploaded = st.file_uploader("Dataset must include a 'text' (or 'post'/'content'/'body') column.", type=["csv", "json"])
     st.markdown("Optional: add up to 4 style cues (one per line).")
-    style_textarea = st.text_area("Style cues", value="", placeholder="Short hooks\nActionable bullets\nStories with numbers\nTactical CTA")
-    submitted = st.form_submit_button("Generate Post")
 if submitted:
     if not os.getenv("GROQ_API_KEY"):
         st.error("GROQ_API_KEY missing. Add it in Space → Settings → Variables & Secrets.")
         st.stop()
-    if not topic.strip():
-        st.warning("Please enter a topic.")
-        st.stop()
-    # Load dataset if provided
     posts_df = None
-    if uploaded:
         try:
             posts_df = load_posts_from_file(uploaded)
         except Exception as e:
             st.error(f"Dataset error: {e}")
             st.stop()
-    # Extract keywords
     keywords = extract_keywords(topic, posts_df)
-    # Style cues
-    style_refs = []
-    if style_textarea.strip():
-        style_refs = [s.strip() for s in style_textarea.splitlines() if s.strip()]
-        style_refs = style_refs[:4]
-    # Build prompt and generate
-    prompt = build_viral_prompt(
-        topic=topic,
-        audience=audience,
-        tone=tone,
-        target_len=target_len,
-        style_refs=style_refs,
-        keywords=keywords
-    )
     with st.spinner("Generating with Groq..."):
         try:
-            max_tokens = clamp(int(target_len * 1.6) + 120, 200, 1200)
-            txt = groq_generate(
-                prompt=prompt,
-                model=model,
-                temperature=temperature,
-                top_p=top_p,
-                max_tokens=max_tokens
-            )
-            # Clean and display
-            txt = dedupe_sentences(strip_labels(txt))
-            st.success("Generated Post")
-            st.write(txt)
-            st.download_button("Download (.txt)", txt, file_name="linkedin_post.txt")
-            with st.expander("Debug: keywords & prompt"):
-                st.write({"keywords": keywords, "style_refs": style_refs})
-                st.code(prompt)
         except Exception as e:
             st.error(f"Groq generation failed: {e}")

 import streamlit as st
 import pandas as pd
+# ─────────────────────────────────────────
+# Config
+# ─────────────────────────────────────────
+DEFAULT_MODEL = "llama-3.3-70b-versatile"  # Groq
+STOPWORDS = set("""
 a an and the or for nor but so yet of to in on with at by from as is are was were be being been
 i you he she it we they them us our your their this that these those here there
 """.split())
+# ─────────────────────────────────────────
+# Groq client
+# ─────────────────────────────────────────
 try:
     from groq import Groq
 except ImportError:
 def get_groq_client():
     api_key = os.getenv("GROQ_API_KEY")
     if not api_key:
+        raise RuntimeError("Missing GROQ_API_KEY. Set in Space → Settings → Variables & Secrets.")
     if Groq is None:
         raise RuntimeError("Package 'groq' not installed. Add 'groq' to requirements.txt.")
     return Groq(api_key=api_key)
+def groq_chat(prompt, model, temperature, top_p, max_tokens):
     client = get_groq_client()
     resp = client.chat.completions.create(
         model=model,
         messages=[
+            {"role": "system", "content": "You craft concise, original, high-signal LinkedIn posts."},
             {"role": "user", "content": prompt}
         ],
         temperature=temperature,
         top_p=top_p,
         max_tokens=max_tokens,
     )
     return resp.choices[0].message.content.strip()
+# ─────────────────────────────────────────
+# Utilities
+# ─────────────────────────────────────────
+def clamp(n, lo, hi):
+    return max(lo, min(hi, n))
 def dedupe_sentences(text: str) -> str:
     parts = re.split(r'(?<=[.!?])\s+', text.strip())
     seen = set()
         cleaned.append(L)
     return "\n".join(cleaned).strip()
+# ─────────────────────────────────────────
+# Dataset ingest + keywords (optional, improves relevance)
+# ─────────────────────────────────────────
 def load_posts_from_file(file) -> pd.DataFrame:
     name = file.name.lower()
     if name.endswith(".csv"):
     elif name.endswith(".json"):
         df = pd.read_json(file, lines=False)
     else:
+        raise ValueError("Upload CSV or JSON.")
+    cand = [c for c in df.columns if c.lower() in ("text","post","content","body")]
+    if not cand:
+        raise ValueError("Dataset must contain a 'text' (or post/content/body) column.")
     if "text" not in df.columns:
+        df["text"] = df[cand[0]]
     df["text"] = df["text"].fillna("").astype(str)
     return df[["text"]]
     words = re.findall(r"[A-Za-z0-9#+\-_/']+", text.lower())
     phrases, cur = [], []
     for w in words:
+        if w in STOPWORDS:
             if cur:
                 phrases.append(" ".join(cur))
                 cur = []
             cur.append(w)
     if cur:
         phrases.append(" ".join(cur))
     freq, degree = {}, {}
     for ph in phrases:
         toks = ph.split()
         for t in toks:
             freq[t] = freq.get(t, 0) + 1
+            degree[t] = degree.get(t, 0) + (len(toks)-1)
     scores = {}
     for ph in phrases:
         s = 0.0
         for t in ph.split():
+            s += (degree.get(t,0)+1)/ (freq.get(t,1))
+        scores[ph] = scores.get(ph,0)+s
     ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)
+    filtered = [p for p,_ in ranked if min_len <= len(p.split()) <= max_len]
     return filtered[:top_k]
+def tfidf_builder(texts, top_k=8):
     docs = [re.findall(r"[A-Za-z0-9#+\-_/']+", t.lower()) for t in texts]
     vocab = {}
     for d in docs:
         for w in set(d):
+            vocab[w] = vocab.get(w,0)+1
     N = len(docs)
+    def score(text):
         doc = re.findall(r"[A-Za-z0-9#+\-_/']+", text.lower())
         tf = {}
         for w in doc:
+            tf[w] = tf.get(w,0)+1
         scores = {}
+        for w,c in tf.items():
+            df = vocab.get(w,1)
+            idf = math.log((N+1)/(df+1))+1
+            scores[w] = (c/len(doc))*idf
         ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)
+        return [w for w,_ in ranked[:top_k]]
+    return score
+def extract_keywords(topic, df: pd.DataFrame|None):
+    if df is not None and len(df):
+        sample = df["text"].sample(min(30, len(df)), random_state=42).tolist()
+        rake_kw = simple_rake(" ".join(sample + [topic]), min_len=2, max_len=3, top_k=12)
+        tfidf_fn = tfidf_builder(df["text"].tolist(), top_k=8)
         kw2 = tfidf_fn(topic + " " + " ".join(sample[:5]))
+        raw = rake_kw + kw2
     else:
+        raw = simple_rake(topic, min_len=1, max_len=2, top_k=8)
     seen, out = set(), []
+    for k in raw:
+        k2 = re.sub(r"\s+"," ",k.strip().lower())
         if k2 and k2 not in seen:
+            seen.add(k2); out.append(k2)
+    return out[:12]
+# ─────────────────────────────────────────
+# Stage‑2 Prompt (hidden structure, plain output)
+# ─────────────────────────────────────────
+def build_stage2_prompt(topic, language, target_len, tone, keywords=None, style_cues=None):
+    kw_block = ", ".join((keywords or [])[:8]) if keywords else "N/A"
+    cues_block = "\n".join(f"- {c}" for c in (style_cues or [])[:4]) if style_cues else "- None"
     return (
         "You are a senior LinkedIn content strategist.\n"
+        "Objective: Write a viral, insightful LinkedIn post as plain text only (no section headers, no labels).\n\n"
+        f"Language: {language}\n"
         f"Topic: \"{topic}\"\n"
         f"Tone: \"{tone}\"\n"
+        f"Approx length: ~{target_len} words\n"
+        f"Keywords to weave in naturally: {kw_block}\n"
+        "Style cues (apply silently):\n"
+        f"{cues_block}\n\n"
+        "Apply without mentioning rules:\n"
+        "- Curiosity‑driven first line.\n"
+        "- Short paragraphs; concrete, novel insights (3–5), examples welcome.\n"
+        "- Max 2 emojis; 2–4 niche hashtags only at very end (optional).\n"
+        "- No repeated sentences; avoid clichés.\n"
+        "- Output must be one cohesive post in plain text. No labels or headings."
     )
+# ─────────────────────────────────────────
+# UI
+# ─────────────────────────────────────────
+st.set_page_config(page_title="LinkedIn Post Generator — Stage 2 (Groq)", layout="centered")
+st.title("Stage 2: Topic → Prompt → Llama‑3.x (Groq) → 3 Variants")
 with st.sidebar:
     st.subheader("Groq & Decoding")
     model = st.selectbox(
         "Groq model",
+        options=["llama-3.3-70b-versatile","llama-3.1-8b-instant","mixtral-8x7b-32768"],
         index=0
     )
     temperature = st.slider("Temperature", 0.1, 1.2, 0.6, 0.05)
+    top_p = st.slider("Top‑p", 0.1, 1.0, 0.9, 0.05)
     target_len = st.slider("Target length (words)", 60, 300, 140, 10)
     st.markdown("Set GROQ_API_KEY in Space → Settings → Variables & Secrets.")
+with st.form("stage2_form"):
     topic = st.text_input("Topic", "Generative AI for Business")
+    language = st.selectbox("Language", ["English","Urdu","Arabic","French","Spanish"], index=0)
+    tone = st.selectbox("Tone", ["Professional","Friendly","Inspirational","Technical","Concise"], index=0)
+    st.markdown("Optional: upload a dataset of past LinkedIn posts (CSV/JSON) with a 'text' column.")
+    uploaded = st.file_uploader("Upload CSV/JSON", type=["csv","json"])
     st.markdown("Optional: add up to 4 style cues (one per line).")
+    style_text = st.text_area("Style cues", value="", placeholder="Short hooks\nActionable bullets\nStories with numbers\nTactical CTA")
+    submitted = st.form_submit_button("Generate 3 Variants")
 if submitted:
     if not os.getenv("GROQ_API_KEY"):
         st.error("GROQ_API_KEY missing. Add it in Space → Settings → Variables & Secrets.")
         st.stop()
     posts_df = None
+    if uploaded is not None:
         try:
             posts_df = load_posts_from_file(uploaded)
         except Exception as e:
             st.error(f"Dataset error: {e}")
             st.stop()
     keywords = extract_keywords(topic, posts_df)
+    style_cues = [s.strip() for s in style_text.splitlines() if s.strip()][:4]
+    prompt = build_stage2_prompt(topic, language, target_len, tone, keywords, style_cues)
+    st.subheader("Variants")
+    variants = []
     with st.spinner("Generating with Groq..."):
         try:
+            max_tokens = clamp(int(target_len*1.6)+120, 200, 1200)
+            # Generate 3 separate candidates
+            for i in range(3):
+                raw = groq_chat(prompt, model, temperature, top_p, max_tokens)
+                clean = dedupe_sentences(strip_labels(raw))
+                variants.append(clean)
         except Exception as e:
             st.error(f"Groq generation failed: {e}")
+            st.stop()
+    for i, v in enumerate(variants, start=1):
+        st.markdown(f"### Post {i}")
+        st.write(v)
+        st.download_button(f"Download Post {i}", v, file_name=f"post_{i}.txt")