Alpha108's picture
Update app.py
d0be94e verified
import os
import re
import json
import math
import streamlit as st
import pandas as pd
# ─────────────────────────────────────────
# Config
# ─────────────────────────────────────────
DEFAULT_MODEL = "llama-3.3-70b-versatile"
STOPWORDS = set("""
a an and the or for nor but so yet of to in on with at by from as is are was were be being been
i you he she it we they them us our your their this that these those here there
""".split())
# ─────────────────────────────────────────
# Groq client
# ─────────────────────────────────────────
try:
from groq import Groq
except ImportError:
Groq = None
def get_groq_client():
api_key = os.getenv("GROQ_API_KEY")
if not api_key:
raise RuntimeError("Missing GROQ_API_KEY. Set it in Space β†’ Settings β†’ Variables & Secrets.")
if Groq is None:
raise RuntimeError("Package 'groq' not installed. Add 'groq' to requirements.txt.")
return Groq(api_key=api_key)
def groq_chat(prompt, model, temperature, top_p, max_tokens):
client = get_groq_client()
resp = client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": "You craft concise, original, high-signal LinkedIn posts. Respond with plain text only."},
{"role": "user", "content": prompt}
],
temperature=temperature,
top_p=top_p,
max_tokens=max_tokens,
)
return resp.choices[0].message.content.strip()
# ─────────────────────────────────────────
# Utils
# ─────────────────────────────────────────
def clamp(n, lo, hi):
return max(lo, min(hi, n))
def dedupe_sentences(text: str) -> str:
parts = re.split(r'(?<=[.!?])\s+', text.strip())
seen = set()
out = []
for p in parts:
norm = re.sub(r'\s+', ' ', p.strip().lower())
if norm and norm not in seen:
seen.add(norm)
out.append(p.strip())
return " ".join(out).strip()
def strip_labels(text: str) -> str:
patterns = [
r'^\s*hook:\s*', r'^\s*body:\s*', r'^\s*takeaway:\s*', r'^\s*cta:\s*',
r'^\s*Hook:\s*', r'^\s*Body:\s*', r'^\s*Takeaway:\s*', r'^\s*CTA:\s*'
]
lines = text.splitlines()
cleaned = []
for line in lines:
L = line
for p in patterns:
L = re.sub(p, '', L)
cleaned.append(L)
return "\n".join(cleaned).strip()
# ─────────────────────────────────────────
# Dataset ingest + keywords (optional)
# ─────────────────────────────────────────
def load_posts_from_file(file) -> pd.DataFrame:
name = file.name.lower()
if name.endswith(".csv"):
df = pd.read_csv(file)
elif name.endswith(".json"):
df = pd.read_json(file, lines=False)
else:
raise ValueError("Upload CSV or JSON.")
cand = [c for c in df.columns if c.lower() in ("text","post","content","body")]
if not cand:
raise ValueError("Dataset must contain 'text' (or post/content/body).")
if "text" not in df.columns:
df["text"] = df[cand[0]]
df["text"] = df["text"].fillna("").astype(str)
return df[["text"]]
def simple_rake(text, min_len=2, max_len=3, top_k=12):
words = re.findall(r"[A-Za-z0-9#+\\-_/']+", text.lower())
phrases, cur = [], []
for w in words:
if w in STOPWORDS:
if cur:
phrases.append(" ".join(cur))
cur = []
else:
cur.append(w)
if cur:
phrases.append(" ".join(cur))
freq, degree = {}, {}
for ph in phrases:
toks = ph.split()
for t in toks:
freq[t] = freq.get(t, 0) + 1
degree[t] = degree.get(t, 0) + (len(toks)-1)
scores = {}
for ph in phrases:
s = 0.0
for t in ph.split():
s += (degree.get(t,0)+1) / (freq.get(t,1))
scores[ph] = scores.get(ph,0) + s
ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)
filtered = [p for p,_ in ranked if min_len <= len(p.split()) <= max_len]
return filtered[:top_k]
def tfidf_builder(texts, top_k=8):
docs = [re.findall(r"[A-Za-z0-9#+\\-_/']+", t.lower()) for t in texts]
vocab = {}
for d in docs:
for w in set(d):
vocab[w] = vocab.get(w,0)+1
N = len(docs)
def score(text):
doc = re.findall(r"[A-Za-z0-9#+\\-_/']+", text.lower())
tf = {}
for w in doc:
tf[w] = tf.get(w,0)+1
scores = {}
for w,c in tf.items():
df = vocab.get(w,1)
idf = math.log((N+1)/(df+1))+1
scores[w] = (c/len(doc))*idf
ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)
return [w for w,_ in ranked[:top_k]]
return score
def extract_keywords(topic, df: pd.DataFrame|None):
if df is not None and len(df):
sample = df["text"].sample(min(30, len(df)), random_state=42).tolist()
rake_kw = simple_rake(" ".join(sample + [topic]), min_len=2, max_len=3, top_k=12)
tfidf_fn = tfidf_builder(df["text"].tolist(), top_k=8)
kw2 = tfidf_fn(topic + " " + " ".join(sample[:5]))
raw = rake_kw + kw2
else:
raw = simple_rake(topic, min_len=1, max_len=2, top_k=8)
seen, out = set(), []
for k in raw:
k2 = re.sub(r"\\s+"," ",k.strip().lower())
if k2 and k2 not in seen:
seen.add(k2); out.append(k2)
return out[:12]
# ─────────────────────────────────────────
# Interactive clarifier
# ─────────────────────────────────────────
def need_clarification(purpose, evidence):
questions = []
if not purpose:
questions.append("What outcome do you want from this post? (awareness, demo requests, hiring, launch, opinion, lesson)")
if not evidence:
questions.append("Share one concrete detail to include (metric, anecdote, quote, or specific example).")
return questions
# ─────────────────────────────────────────
# Prompt (single post, plain text)
# ─────────────────────────────────────────
def build_prompt(topic, language, tone, target_len, purpose, audience, evidence, keywords, style_cues, clarifier_notes):
kw_block = ", ".join((keywords or [])[:8]) if keywords else "N/A"
cues_block = "\\n".join(f"- {c}" for c in (style_cues or [])[:4]) if style_cues else "- None"
notes = (clarifier_notes or "").strip()
return (
"You are a senior LinkedIn content strategist. "
"Write one viral, insightful LinkedIn post as plain text only (no section headers, no labels).\n\n"
f"Language: {language}\n"
f"Topic: \"{topic}\"\n"
f"Purpose: {purpose or 'awareness'}\n"
f"Audience: {audience or 'general professionals'}\n"
f"Tone: {tone}\n"
f"Approx length: ~{target_len} words\n"
f"Keywords to weave in naturally: {kw_block}\n"
"Style cues (apply silently):\n"
f"{cues_block}\n\n"
"User-provided evidence/details (incorporate if relevant):\n"
f"{evidence or 'None'}\n\n"
"Additional notes from clarifier (apply silently):\n"
f"{notes or 'None'}\n\n"
"Rules (do not mention these explicitly):\n"
"- Curiosity-driven first line.\n"
"- Short paragraphs; concrete, novel insights (3–5), examples welcome.\n"
"- Max 2 emojis; 2–4 niche hashtags only at end (optional).\n"
"- No repeated sentences; avoid clichΓ©s.\n"
"- Return a single cohesive post in plain text only."
)
# ─────────────────────────────────────────
# Streamlit UI
# ─────────────────────────────────────────
st.set_page_config(page_title="LinkedIn Post Generator β€” Groq (Interactive)", layout="centered")
st.title("LinkedIn Post Generator β€” Interactive (Groq)")
with st.sidebar:
st.subheader("Groq & Decoding")
model = st.selectbox("Groq model",
["llama-3.3-70b-versatile","llama-3.1-8b-instant","mixtral-8x7b-32768"], index=0)
temperature = st.slider("Temperature", 0.1, 1.2, 0.6, 0.05)
top_p = st.slider("Top‑p", 0.1, 1.0, 0.9, 0.05)
target_len = st.slider("Target length (words)", 60, 300, 140, 10)
st.markdown("Set GROQ_API_KEY in Space β†’ Settings β†’ Variables & Secrets.")
with st.form("main"):
topic = st.text_input("Topic", "Generative AI for Business")
purpose = st.selectbox("Purpose", ["", "awareness", "lead-gen", "hiring", "product launch", "opinion", "lesson learned"], index=0)
audience = st.text_input("Audience", "Startup founders")
tone = st.selectbox("Tone", ["Professional", "Friendly", "Contrarian", "Technical", "Inspirational"], index=0)
language = st.selectbox("Language", ["English","Urdu","Arabic","French","Spanish"], index=0)
st.markdown("Optional: upload CSV/JSON of past posts (must include 'text').")
uploaded = st.file_uploader("Upload dataset", type=["csv","json"])
st.markdown("Optional: style cues (max 4, one per line).")
style_text = st.text_area("Style cues", value="", placeholder="Short hooks\nActionable bullets\nStories with numbers\nTactical CTA")
st.markdown("Optional: evidence to include (metric, anecdote, quote).")
evidence = st.text_area("Evidence", value="")
submitted = st.form_submit_button("Continue")
# Session state for clarifier & output
if "clarifier_notes" not in st.session_state:
st.session_state.clarifier_notes = ""
if "last_post" not in st.session_state:
st.session_state.last_post = ""
if submitted:
# Load dataset and extract keywords
posts_df = None
if uploaded is not None:
try:
posts_df = load_posts_from_file(uploaded)
except Exception as e:
st.error(f"Dataset error: {e}")
st.stop()
keywords = extract_keywords(topic, posts_df)
style_cues = [s.strip() for s in style_text.splitlines() if s.strip()][:4]
# Clarifier
qs = need_clarification(purpose, evidence)
if qs:
st.info("Clarifier")
for q in qs:
ans = st.text_input(q, key=f"q_{q}")
if ans:
st.session_state.clarifier_notes += f"{q} -> {ans}\n"
if st.button("Generate Post"):
prompt = build_prompt(topic, language, tone, target_len, purpose, audience, evidence, keywords, style_cues, st.session_state.clarifier_notes)
with st.spinner("Generating with Groq..."):
try:
max_tokens = clamp(int(target_len*1.6)+120, 200, 1200)
raw = groq_chat(prompt, model, temperature, top_p, max_tokens)
clean = dedupe_sentences(strip_labels(raw))
st.session_state.last_post = clean
except Exception as e:
st.error(f"Groq generation failed: {e}")
# show output if available
if st.session_state.last_post:
st.subheader("Post")
st.write(st.session_state.last_post)
st.download_button("Download (.txt)", st.session_state.last_post, file_name="linkedin_post.txt")
else:
# Generate directly
prompt = build_prompt(topic, language, tone, target_len, purpose, audience, evidence, keywords, style_cues, st.session_state.clarifier_notes)
with st.spinner("Generating with Groq..."):
try:
max_tokens = clamp(int(target_len*1.6)+120, 200, 1200)
raw = groq_chat(prompt, model, temperature, top_p, max_tokens)
clean = dedupe_sentences(strip_labels(raw))
st.session_state.last_post = clean
except Exception as e:
st.error(f"Groq generation failed: {e}")
if st.session_state.last_post:
st.subheader("Post")
st.write(st.session_state.last_post)
st.download_button("Download (.txt)", st.session_state.last_post, file_name="linkedin_post.txt")
# Refinements (transform the last output)
if st.session_state.last_post:
st.markdown("---")
st.subheader("Refine")
col1, col2, col3, col4, col5 = st.columns(5)
def refine(op):
if not st.session_state.last_post:
return
instr = {
"shorter": "Shorten to ~120 words. Keep the opening intact. Return plain text only.",
"punchier": "Make the hook more punchy and contrarian; keep total length similar. Plain text only.",
"add_data": "Add one concrete metric or example to support the main claim. Plain text only.",
"less_emoji": "Remove emojis entirely. Plain text only.",
"add_tags": "Append 2–4 niche hashtags at the end (new line). Plain text only."
}[op]
prompt = (
"You are editing a LinkedIn post. Apply the instruction and return plain text only.\n\n"
f"Instruction: {instr}\n\n"
f"Post:\n{st.session_state.last_post}"
)
try:
raw = groq_chat(prompt, model, temperature, top_p, clamp(600, 200, 1200))
st.session_state.last_post = dedupe_sentences(strip_labels(raw))
except Exception as e:
st.error(f"Refinement failed: {e}")
if col1.button("Shorter"): refine("shorter")
if col2.button("Punchier hook"): refine("punchier")
if col3.button("Add data point"): refine("add_data")
if col4.button("No emojis"): refine("less_emoji")
if col5.button("Add hashtags"): refine("add_tags")
st.write(st.session_state.last_post)