import os import re import json import math import streamlit as st import pandas as pd # ───────────────────────────────────────── # Config # ───────────────────────────────────────── DEFAULT_MODEL = "llama-3.3-70b-versatile" STOPWORDS = set(""" a an and the or for nor but so yet of to in on with at by from as is are was were be being been i you he she it we they them us our your their this that these those here there """.split()) # ───────────────────────────────────────── # Groq client # ───────────────────────────────────────── try: from groq import Groq except ImportError: Groq = None def get_groq_client(): api_key = os.getenv("GROQ_API_KEY") if not api_key: raise RuntimeError("Missing GROQ_API_KEY. Set it in Space → Settings → Variables & Secrets.") if Groq is None: raise RuntimeError("Package 'groq' not installed. Add 'groq' to requirements.txt.") return Groq(api_key=api_key) def groq_chat(prompt, model, temperature, top_p, max_tokens): client = get_groq_client() resp = client.chat.completions.create( model=model, messages=[ {"role": "system", "content": "You craft concise, original, high-signal LinkedIn posts. Respond with plain text only."}, {"role": "user", "content": prompt} ], temperature=temperature, top_p=top_p, max_tokens=max_tokens, ) return resp.choices[0].message.content.strip() # ───────────────────────────────────────── # Utils # ───────────────────────────────────────── def clamp(n, lo, hi): return max(lo, min(hi, n)) def dedupe_sentences(text: str) -> str: parts = re.split(r'(?<=[.!?])\s+', text.strip()) seen = set() out = [] for p in parts: norm = re.sub(r'\s+', ' ', p.strip().lower()) if norm and norm not in seen: seen.add(norm) out.append(p.strip()) return " ".join(out).strip() def strip_labels(text: str) -> str: patterns = [ r'^\s*hook:\s*', r'^\s*body:\s*', r'^\s*takeaway:\s*', r'^\s*cta:\s*', r'^\s*Hook:\s*', r'^\s*Body:\s*', r'^\s*Takeaway:\s*', r'^\s*CTA:\s*' ] lines = text.splitlines() cleaned = [] for line in lines: L = line for p in patterns: L = re.sub(p, '', L) cleaned.append(L) return "\n".join(cleaned).strip() # ───────────────────────────────────────── # Dataset ingest + keywords (optional) # ───────────────────────────────────────── def load_posts_from_file(file) -> pd.DataFrame: name = file.name.lower() if name.endswith(".csv"): df = pd.read_csv(file) elif name.endswith(".json"): df = pd.read_json(file, lines=False) else: raise ValueError("Upload CSV or JSON.") cand = [c for c in df.columns if c.lower() in ("text","post","content","body")] if not cand: raise ValueError("Dataset must contain 'text' (or post/content/body).") if "text" not in df.columns: df["text"] = df[cand[0]] df["text"] = df["text"].fillna("").astype(str) return df[["text"]] def simple_rake(text, min_len=2, max_len=3, top_k=12): words = re.findall(r"[A-Za-z0-9#+\\-_/']+", text.lower()) phrases, cur = [], [] for w in words: if w in STOPWORDS: if cur: phrases.append(" ".join(cur)) cur = [] else: cur.append(w) if cur: phrases.append(" ".join(cur)) freq, degree = {}, {} for ph in phrases: toks = ph.split() for t in toks: freq[t] = freq.get(t, 0) + 1 degree[t] = degree.get(t, 0) + (len(toks)-1) scores = {} for ph in phrases: s = 0.0 for t in ph.split(): s += (degree.get(t,0)+1) / (freq.get(t,1)) scores[ph] = scores.get(ph,0) + s ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True) filtered = [p for p,_ in ranked if min_len <= len(p.split()) <= max_len] return filtered[:top_k] def tfidf_builder(texts, top_k=8): docs = [re.findall(r"[A-Za-z0-9#+\\-_/']+", t.lower()) for t in texts] vocab = {} for d in docs: for w in set(d): vocab[w] = vocab.get(w,0)+1 N = len(docs) def score(text): doc = re.findall(r"[A-Za-z0-9#+\\-_/']+", text.lower()) tf = {} for w in doc: tf[w] = tf.get(w,0)+1 scores = {} for w,c in tf.items(): df = vocab.get(w,1) idf = math.log((N+1)/(df+1))+1 scores[w] = (c/len(doc))*idf ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True) return [w for w,_ in ranked[:top_k]] return score def extract_keywords(topic, df: pd.DataFrame|None): if df is not None and len(df): sample = df["text"].sample(min(30, len(df)), random_state=42).tolist() rake_kw = simple_rake(" ".join(sample + [topic]), min_len=2, max_len=3, top_k=12) tfidf_fn = tfidf_builder(df["text"].tolist(), top_k=8) kw2 = tfidf_fn(topic + " " + " ".join(sample[:5])) raw = rake_kw + kw2 else: raw = simple_rake(topic, min_len=1, max_len=2, top_k=8) seen, out = set(), [] for k in raw: k2 = re.sub(r"\\s+"," ",k.strip().lower()) if k2 and k2 not in seen: seen.add(k2); out.append(k2) return out[:12] # ───────────────────────────────────────── # Interactive clarifier # ───────────────────────────────────────── def need_clarification(purpose, evidence): questions = [] if not purpose: questions.append("What outcome do you want from this post? (awareness, demo requests, hiring, launch, opinion, lesson)") if not evidence: questions.append("Share one concrete detail to include (metric, anecdote, quote, or specific example).") return questions # ───────────────────────────────────────── # Prompt (single post, plain text) # ───────────────────────────────────────── def build_prompt(topic, language, tone, target_len, purpose, audience, evidence, keywords, style_cues, clarifier_notes): kw_block = ", ".join((keywords or [])[:8]) if keywords else "N/A" cues_block = "\\n".join(f"- {c}" for c in (style_cues or [])[:4]) if style_cues else "- None" notes = (clarifier_notes or "").strip() return ( "You are a senior LinkedIn content strategist. " "Write one viral, insightful LinkedIn post as plain text only (no section headers, no labels).\n\n" f"Language: {language}\n" f"Topic: \"{topic}\"\n" f"Purpose: {purpose or 'awareness'}\n" f"Audience: {audience or 'general professionals'}\n" f"Tone: {tone}\n" f"Approx length: ~{target_len} words\n" f"Keywords to weave in naturally: {kw_block}\n" "Style cues (apply silently):\n" f"{cues_block}\n\n" "User-provided evidence/details (incorporate if relevant):\n" f"{evidence or 'None'}\n\n" "Additional notes from clarifier (apply silently):\n" f"{notes or 'None'}\n\n" "Rules (do not mention these explicitly):\n" "- Curiosity-driven first line.\n" "- Short paragraphs; concrete, novel insights (3–5), examples welcome.\n" "- Max 2 emojis; 2–4 niche hashtags only at end (optional).\n" "- No repeated sentences; avoid clichés.\n" "- Return a single cohesive post in plain text only." ) # ───────────────────────────────────────── # Streamlit UI # ───────────────────────────────────────── st.set_page_config(page_title="LinkedIn Post Generator — Groq (Interactive)", layout="centered") st.title("LinkedIn Post Generator — Interactive (Groq)") with st.sidebar: st.subheader("Groq & Decoding") model = st.selectbox("Groq model", ["llama-3.3-70b-versatile","llama-3.1-8b-instant","mixtral-8x7b-32768"], index=0) temperature = st.slider("Temperature", 0.1, 1.2, 0.6, 0.05) top_p = st.slider("Top‑p", 0.1, 1.0, 0.9, 0.05) target_len = st.slider("Target length (words)", 60, 300, 140, 10) st.markdown("Set GROQ_API_KEY in Space → Settings → Variables & Secrets.") with st.form("main"): topic = st.text_input("Topic", "Generative AI for Business") purpose = st.selectbox("Purpose", ["", "awareness", "lead-gen", "hiring", "product launch", "opinion", "lesson learned"], index=0) audience = st.text_input("Audience", "Startup founders") tone = st.selectbox("Tone", ["Professional", "Friendly", "Contrarian", "Technical", "Inspirational"], index=0) language = st.selectbox("Language", ["English","Urdu","Arabic","French","Spanish"], index=0) st.markdown("Optional: upload CSV/JSON of past posts (must include 'text').") uploaded = st.file_uploader("Upload dataset", type=["csv","json"]) st.markdown("Optional: style cues (max 4, one per line).") style_text = st.text_area("Style cues", value="", placeholder="Short hooks\nActionable bullets\nStories with numbers\nTactical CTA") st.markdown("Optional: evidence to include (metric, anecdote, quote).") evidence = st.text_area("Evidence", value="") submitted = st.form_submit_button("Continue") # Session state for clarifier & output if "clarifier_notes" not in st.session_state: st.session_state.clarifier_notes = "" if "last_post" not in st.session_state: st.session_state.last_post = "" if submitted: # Load dataset and extract keywords posts_df = None if uploaded is not None: try: posts_df = load_posts_from_file(uploaded) except Exception as e: st.error(f"Dataset error: {e}") st.stop() keywords = extract_keywords(topic, posts_df) style_cues = [s.strip() for s in style_text.splitlines() if s.strip()][:4] # Clarifier qs = need_clarification(purpose, evidence) if qs: st.info("Clarifier") for q in qs: ans = st.text_input(q, key=f"q_{q}") if ans: st.session_state.clarifier_notes += f"{q} -> {ans}\n" if st.button("Generate Post"): prompt = build_prompt(topic, language, tone, target_len, purpose, audience, evidence, keywords, style_cues, st.session_state.clarifier_notes) with st.spinner("Generating with Groq..."): try: max_tokens = clamp(int(target_len*1.6)+120, 200, 1200) raw = groq_chat(prompt, model, temperature, top_p, max_tokens) clean = dedupe_sentences(strip_labels(raw)) st.session_state.last_post = clean except Exception as e: st.error(f"Groq generation failed: {e}") # show output if available if st.session_state.last_post: st.subheader("Post") st.write(st.session_state.last_post) st.download_button("Download (.txt)", st.session_state.last_post, file_name="linkedin_post.txt") else: # Generate directly prompt = build_prompt(topic, language, tone, target_len, purpose, audience, evidence, keywords, style_cues, st.session_state.clarifier_notes) with st.spinner("Generating with Groq..."): try: max_tokens = clamp(int(target_len*1.6)+120, 200, 1200) raw = groq_chat(prompt, model, temperature, top_p, max_tokens) clean = dedupe_sentences(strip_labels(raw)) st.session_state.last_post = clean except Exception as e: st.error(f"Groq generation failed: {e}") if st.session_state.last_post: st.subheader("Post") st.write(st.session_state.last_post) st.download_button("Download (.txt)", st.session_state.last_post, file_name="linkedin_post.txt") # Refinements (transform the last output) if st.session_state.last_post: st.markdown("---") st.subheader("Refine") col1, col2, col3, col4, col5 = st.columns(5) def refine(op): if not st.session_state.last_post: return instr = { "shorter": "Shorten to ~120 words. Keep the opening intact. Return plain text only.", "punchier": "Make the hook more punchy and contrarian; keep total length similar. Plain text only.", "add_data": "Add one concrete metric or example to support the main claim. Plain text only.", "less_emoji": "Remove emojis entirely. Plain text only.", "add_tags": "Append 2–4 niche hashtags at the end (new line). Plain text only." }[op] prompt = ( "You are editing a LinkedIn post. Apply the instruction and return plain text only.\n\n" f"Instruction: {instr}\n\n" f"Post:\n{st.session_state.last_post}" ) try: raw = groq_chat(prompt, model, temperature, top_p, clamp(600, 200, 1200)) st.session_state.last_post = dedupe_sentences(strip_labels(raw)) except Exception as e: st.error(f"Refinement failed: {e}") if col1.button("Shorter"): refine("shorter") if col2.button("Punchier hook"): refine("punchier") if col3.button("Add data point"): refine("add_data") if col4.button("No emojis"): refine("less_emoji") if col5.button("Add hashtags"): refine("add_tags") st.write(st.session_state.last_post)