LinkedInPostGenerator1.1

Sleeping

App Files Files Community

LinkedInPostGenerator1.1 / app.py

Alpha108

Update app.py

d0be94e verified 2 months ago

raw

history blame contribute delete

14.5 kB

	import os
	import re
	import json
	import math
	import streamlit as st
	import pandas as pd

	# ─────────────────────────────────────────
	# Config
	# ─────────────────────────────────────────
	DEFAULT_MODEL = "llama-3.3-70b-versatile"
	STOPWORDS = set("""
	a an and the or for nor but so yet of to in on with at by from as is are was were be being been
	i you he she it we they them us our your their this that these those here there
	""".split())

	# ─────────────────────────────────────────
	# Groq client
	# ─────────────────────────────────────────
	try:
	from groq import Groq
	except ImportError:
	Groq = None

	def get_groq_client():
	api_key = os.getenv("GROQ_API_KEY")
	if not api_key:
	raise RuntimeError("Missing GROQ_API_KEY. Set it in Space → Settings → Variables & Secrets.")
	if Groq is None:
	raise RuntimeError("Package 'groq' not installed. Add 'groq' to requirements.txt.")
	return Groq(api_key=api_key)

	def groq_chat(prompt, model, temperature, top_p, max_tokens):
	client = get_groq_client()
	resp = client.chat.completions.create(
	model=model,
	messages=[
	{"role": "system", "content": "You craft concise, original, high-signal LinkedIn posts. Respond with plain text only."},
	{"role": "user", "content": prompt}
	],
	temperature=temperature,
	top_p=top_p,
	max_tokens=max_tokens,
	)
	return resp.choices[0].message.content.strip()

	# ─────────────────────────────────────────
	# Utils
	# ─────────────────────────────────────────
	def clamp(n, lo, hi):
	return max(lo, min(hi, n))

	def dedupe_sentences(text: str) -> str:
	parts = re.split(r'(?<=[.!?])\s+', text.strip())
	seen = set()
	out = []
	for p in parts:
	norm = re.sub(r'\s+', ' ', p.strip().lower())
	if norm and norm not in seen:
	seen.add(norm)
	out.append(p.strip())
	return " ".join(out).strip()

	def strip_labels(text: str) -> str:
	patterns = [
	r'^\shook:\s', r'^\sbody:\s', r'^\stakeaway:\s', r'^\scta:\s',
	r'^\sHook:\s', r'^\sBody:\s', r'^\sTakeaway:\s', r'^\sCTA:\s'
	]
	lines = text.splitlines()
	cleaned = []
	for line in lines:
	L = line
	for p in patterns:
	L = re.sub(p, '', L)
	cleaned.append(L)
	return "\n".join(cleaned).strip()

	# ─────────────────────────────────────────
	# Dataset ingest + keywords (optional)
	# ─────────────────────────────────────────
	def load_posts_from_file(file) -> pd.DataFrame:
	name = file.name.lower()
	if name.endswith(".csv"):
	df = pd.read_csv(file)
	elif name.endswith(".json"):
	df = pd.read_json(file, lines=False)
	else:
	raise ValueError("Upload CSV or JSON.")
	cand = [c for c in df.columns if c.lower() in ("text","post","content","body")]
	if not cand:
	raise ValueError("Dataset must contain 'text' (or post/content/body).")
	if "text" not in df.columns:
	df["text"] = df[cand[0]]
	df["text"] = df["text"].fillna("").astype(str)
	return df[["text"]]

	def simple_rake(text, min_len=2, max_len=3, top_k=12):
	words = re.findall(r"[A-Za-z0-9#+\\-_/']+", text.lower())
	phrases, cur = [], []
	for w in words:
	if w in STOPWORDS:
	if cur:
	phrases.append(" ".join(cur))
	cur = []
	else:
	cur.append(w)
	if cur:
	phrases.append(" ".join(cur))
	freq, degree = {}, {}
	for ph in phrases:
	toks = ph.split()
	for t in toks:
	freq[t] = freq.get(t, 0) + 1
	degree[t] = degree.get(t, 0) + (len(toks)-1)
	scores = {}
	for ph in phrases:
	s = 0.0
	for t in ph.split():
	s += (degree.get(t,0)+1) / (freq.get(t,1))
	scores[ph] = scores.get(ph,0) + s
	ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)
	filtered = [p for p,_ in ranked if min_len <= len(p.split()) <= max_len]
	return filtered[:top_k]

	def tfidf_builder(texts, top_k=8):
	docs = [re.findall(r"[A-Za-z0-9#+\\-_/']+", t.lower()) for t in texts]
	vocab = {}
	for d in docs:
	for w in set(d):
	vocab[w] = vocab.get(w,0)+1
	N = len(docs)
	def score(text):
	doc = re.findall(r"[A-Za-z0-9#+\\-_/']+", text.lower())
	tf = {}
	for w in doc:
	tf[w] = tf.get(w,0)+1
	scores = {}
	for w,c in tf.items():
	df = vocab.get(w,1)
	idf = math.log((N+1)/(df+1))+1
	scores[w] = (c/len(doc))*idf
	ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)
	return [w for w,_ in ranked[:top_k]]
	return score

	def extract_keywords(topic, df: pd.DataFrame\|None):
	if df is not None and len(df):
	sample = df["text"].sample(min(30, len(df)), random_state=42).tolist()
	rake_kw = simple_rake(" ".join(sample + [topic]), min_len=2, max_len=3, top_k=12)
	tfidf_fn = tfidf_builder(df["text"].tolist(), top_k=8)
	kw2 = tfidf_fn(topic + " " + " ".join(sample[:5]))
	raw = rake_kw + kw2
	else:
	raw = simple_rake(topic, min_len=1, max_len=2, top_k=8)
	seen, out = set(), []
	for k in raw:
	k2 = re.sub(r"\\s+"," ",k.strip().lower())
	if k2 and k2 not in seen:
	seen.add(k2); out.append(k2)
	return out[:12]

	# ─────────────────────────────────────────
	# Interactive clarifier
	# ─────────────────────────────────────────
	def need_clarification(purpose, evidence):
	questions = []
	if not purpose:
	questions.append("What outcome do you want from this post? (awareness, demo requests, hiring, launch, opinion, lesson)")
	if not evidence:
	questions.append("Share one concrete detail to include (metric, anecdote, quote, or specific example).")
	return questions

	# ─────────────────────────────────────────
	# Prompt (single post, plain text)
	# ─────────────────────────────────────────
	def build_prompt(topic, language, tone, target_len, purpose, audience, evidence, keywords, style_cues, clarifier_notes):
	kw_block = ", ".join((keywords or [])[:8]) if keywords else "N/A"
	cues_block = "\\n".join(f"- {c}" for c in (style_cues or [])[:4]) if style_cues else "- None"
	notes = (clarifier_notes or "").strip()
	return (
	"You are a senior LinkedIn content strategist. "
	"Write one viral, insightful LinkedIn post as plain text only (no section headers, no labels).\n\n"
	f"Language: {language}\n"
	f"Topic: \"{topic}\"\n"
	f"Purpose: {purpose or 'awareness'}\n"
	f"Audience: {audience or 'general professionals'}\n"
	f"Tone: {tone}\n"
	f"Approx length: ~{target_len} words\n"
	f"Keywords to weave in naturally: {kw_block}\n"
	"Style cues (apply silently):\n"
	f"{cues_block}\n\n"
	"User-provided evidence/details (incorporate if relevant):\n"
	f"{evidence or 'None'}\n\n"
	"Additional notes from clarifier (apply silently):\n"
	f"{notes or 'None'}\n\n"
	"Rules (do not mention these explicitly):\n"
	"- Curiosity-driven first line.\n"
	"- Short paragraphs; concrete, novel insights (3–5), examples welcome.\n"
	"- Max 2 emojis; 2–4 niche hashtags only at end (optional).\n"
	"- No repeated sentences; avoid clichés.\n"
	"- Return a single cohesive post in plain text only."
	)

	# ─────────────────────────────────────────
	# Streamlit UI
	# ─────────────────────────────────────────
	st.set_page_config(page_title="LinkedIn Post Generator — Groq (Interactive)", layout="centered")
	st.title("LinkedIn Post Generator — Interactive (Groq)")

	with st.sidebar:
	st.subheader("Groq & Decoding")
	model = st.selectbox("Groq model",
	["llama-3.3-70b-versatile","llama-3.1-8b-instant","mixtral-8x7b-32768"], index=0)
	temperature = st.slider("Temperature", 0.1, 1.2, 0.6, 0.05)
	top_p = st.slider("Top‑p", 0.1, 1.0, 0.9, 0.05)
	target_len = st.slider("Target length (words)", 60, 300, 140, 10)
	st.markdown("Set GROQ_API_KEY in Space → Settings → Variables & Secrets.")

	with st.form("main"):
	topic = st.text_input("Topic", "Generative AI for Business")
	purpose = st.selectbox("Purpose", ["", "awareness", "lead-gen", "hiring", "product launch", "opinion", "lesson learned"], index=0)
	audience = st.text_input("Audience", "Startup founders")
	tone = st.selectbox("Tone", ["Professional", "Friendly", "Contrarian", "Technical", "Inspirational"], index=0)
	language = st.selectbox("Language", ["English","Urdu","Arabic","French","Spanish"], index=0)

	st.markdown("Optional: upload CSV/JSON of past posts (must include 'text').")
	uploaded = st.file_uploader("Upload dataset", type=["csv","json"])

	st.markdown("Optional: style cues (max 4, one per line).")
	style_text = st.text_area("Style cues", value="", placeholder="Short hooks\nActionable bullets\nStories with numbers\nTactical CTA")

	st.markdown("Optional: evidence to include (metric, anecdote, quote).")
	evidence = st.text_area("Evidence", value="")

	submitted = st.form_submit_button("Continue")

	# Session state for clarifier & output
	if "clarifier_notes" not in st.session_state:
	st.session_state.clarifier_notes = ""
	if "last_post" not in st.session_state:
	st.session_state.last_post = ""

	if submitted:
	# Load dataset and extract keywords
	posts_df = None
	if uploaded is not None:
	try:
	posts_df = load_posts_from_file(uploaded)
	except Exception as e:
	st.error(f"Dataset error: {e}")
	st.stop()
	keywords = extract_keywords(topic, posts_df)
	style_cues = [s.strip() for s in style_text.splitlines() if s.strip()][:4]

	# Clarifier
	qs = need_clarification(purpose, evidence)
	if qs:
	st.info("Clarifier")
	for q in qs:
	ans = st.text_input(q, key=f"q_{q}")
	if ans:
	st.session_state.clarifier_notes += f"{q} -> {ans}\n"
	if st.button("Generate Post"):
	prompt = build_prompt(topic, language, tone, target_len, purpose, audience, evidence, keywords, style_cues, st.session_state.clarifier_notes)
	with st.spinner("Generating with Groq..."):
	try:
	max_tokens = clamp(int(target_len*1.6)+120, 200, 1200)
	raw = groq_chat(prompt, model, temperature, top_p, max_tokens)
	clean = dedupe_sentences(strip_labels(raw))
	st.session_state.last_post = clean
	except Exception as e:
	st.error(f"Groq generation failed: {e}")
	# show output if available
	if st.session_state.last_post:
	st.subheader("Post")
	st.write(st.session_state.last_post)
	st.download_button("Download (.txt)", st.session_state.last_post, file_name="linkedin_post.txt")
	else:
	# Generate directly
	prompt = build_prompt(topic, language, tone, target_len, purpose, audience, evidence, keywords, style_cues, st.session_state.clarifier_notes)
	with st.spinner("Generating with Groq..."):
	try:
	max_tokens = clamp(int(target_len*1.6)+120, 200, 1200)
	raw = groq_chat(prompt, model, temperature, top_p, max_tokens)
	clean = dedupe_sentences(strip_labels(raw))
	st.session_state.last_post = clean
	except Exception as e:
	st.error(f"Groq generation failed: {e}")

	if st.session_state.last_post:
	st.subheader("Post")
	st.write(st.session_state.last_post)
	st.download_button("Download (.txt)", st.session_state.last_post, file_name="linkedin_post.txt")

	# Refinements (transform the last output)
	if st.session_state.last_post:
	st.markdown("---")
	st.subheader("Refine")
	col1, col2, col3, col4, col5 = st.columns(5)
	def refine(op):
	if not st.session_state.last_post:
	return
	instr = {
	"shorter": "Shorten to ~120 words. Keep the opening intact. Return plain text only.",
	"punchier": "Make the hook more punchy and contrarian; keep total length similar. Plain text only.",
	"add_data": "Add one concrete metric or example to support the main claim. Plain text only.",
	"less_emoji": "Remove emojis entirely. Plain text only.",
	"add_tags": "Append 2–4 niche hashtags at the end (new line). Plain text only."
	}[op]
	prompt = (
	"You are editing a LinkedIn post. Apply the instruction and return plain text only.\n\n"
	f"Instruction: {instr}\n\n"
	f"Post:\n{st.session_state.last_post}"
	)
	try:
	raw = groq_chat(prompt, model, temperature, top_p, clamp(600, 200, 1200))
	st.session_state.last_post = dedupe_sentences(strip_labels(raw))
	except Exception as e:
	st.error(f"Refinement failed: {e}")

	if col1.button("Shorter"): refine("shorter")
	if col2.button("Punchier hook"): refine("punchier")
	if col3.button("Add data point"): refine("add_data")
	if col4.button("No emojis"): refine("less_emoji")
	if col5.button("Add hashtags"): refine("add_tags")

	st.write(st.session_state.last_post)