Spaces:
Sleeping
Sleeping
| import os | |
| import re | |
| import json | |
| import math | |
| import streamlit as st | |
| import pandas as pd | |
| # βββββββββββββββββββββββββββββββββββββββββ | |
| # Config | |
| # βββββββββββββββββββββββββββββββββββββββββ | |
| DEFAULT_MODEL = "llama-3.3-70b-versatile" | |
| STOPWORDS = set(""" | |
| a an and the or for nor but so yet of to in on with at by from as is are was were be being been | |
| i you he she it we they them us our your their this that these those here there | |
| """.split()) | |
| # βββββββββββββββββββββββββββββββββββββββββ | |
| # Groq client | |
| # βββββββββββββββββββββββββββββββββββββββββ | |
| try: | |
| from groq import Groq | |
| except ImportError: | |
| Groq = None | |
| def get_groq_client(): | |
| api_key = os.getenv("GROQ_API_KEY") | |
| if not api_key: | |
| raise RuntimeError("Missing GROQ_API_KEY. Set it in Space β Settings β Variables & Secrets.") | |
| if Groq is None: | |
| raise RuntimeError("Package 'groq' not installed. Add 'groq' to requirements.txt.") | |
| return Groq(api_key=api_key) | |
| def groq_chat(prompt, model, temperature, top_p, max_tokens): | |
| client = get_groq_client() | |
| resp = client.chat.completions.create( | |
| model=model, | |
| messages=[ | |
| {"role": "system", "content": "You craft concise, original, high-signal LinkedIn posts. Respond with plain text only."}, | |
| {"role": "user", "content": prompt} | |
| ], | |
| temperature=temperature, | |
| top_p=top_p, | |
| max_tokens=max_tokens, | |
| ) | |
| return resp.choices[0].message.content.strip() | |
| # βββββββββββββββββββββββββββββββββββββββββ | |
| # Utils | |
| # βββββββββββββββββββββββββββββββββββββββββ | |
| def clamp(n, lo, hi): | |
| return max(lo, min(hi, n)) | |
| def dedupe_sentences(text: str) -> str: | |
| parts = re.split(r'(?<=[.!?])\s+', text.strip()) | |
| seen = set() | |
| out = [] | |
| for p in parts: | |
| norm = re.sub(r'\s+', ' ', p.strip().lower()) | |
| if norm and norm not in seen: | |
| seen.add(norm) | |
| out.append(p.strip()) | |
| return " ".join(out).strip() | |
| def strip_labels(text: str) -> str: | |
| patterns = [ | |
| r'^\s*hook:\s*', r'^\s*body:\s*', r'^\s*takeaway:\s*', r'^\s*cta:\s*', | |
| r'^\s*Hook:\s*', r'^\s*Body:\s*', r'^\s*Takeaway:\s*', r'^\s*CTA:\s*' | |
| ] | |
| lines = text.splitlines() | |
| cleaned = [] | |
| for line in lines: | |
| L = line | |
| for p in patterns: | |
| L = re.sub(p, '', L) | |
| cleaned.append(L) | |
| return "\n".join(cleaned).strip() | |
| # βββββββββββββββββββββββββββββββββββββββββ | |
| # Dataset ingest + keywords (optional) | |
| # βββββββββββββββββββββββββββββββββββββββββ | |
| def load_posts_from_file(file) -> pd.DataFrame: | |
| name = file.name.lower() | |
| if name.endswith(".csv"): | |
| df = pd.read_csv(file) | |
| elif name.endswith(".json"): | |
| df = pd.read_json(file, lines=False) | |
| else: | |
| raise ValueError("Upload CSV or JSON.") | |
| cand = [c for c in df.columns if c.lower() in ("text","post","content","body")] | |
| if not cand: | |
| raise ValueError("Dataset must contain 'text' (or post/content/body).") | |
| if "text" not in df.columns: | |
| df["text"] = df[cand[0]] | |
| df["text"] = df["text"].fillna("").astype(str) | |
| return df[["text"]] | |
| def simple_rake(text, min_len=2, max_len=3, top_k=12): | |
| words = re.findall(r"[A-Za-z0-9#+\\-_/']+", text.lower()) | |
| phrases, cur = [], [] | |
| for w in words: | |
| if w in STOPWORDS: | |
| if cur: | |
| phrases.append(" ".join(cur)) | |
| cur = [] | |
| else: | |
| cur.append(w) | |
| if cur: | |
| phrases.append(" ".join(cur)) | |
| freq, degree = {}, {} | |
| for ph in phrases: | |
| toks = ph.split() | |
| for t in toks: | |
| freq[t] = freq.get(t, 0) + 1 | |
| degree[t] = degree.get(t, 0) + (len(toks)-1) | |
| scores = {} | |
| for ph in phrases: | |
| s = 0.0 | |
| for t in ph.split(): | |
| s += (degree.get(t,0)+1) / (freq.get(t,1)) | |
| scores[ph] = scores.get(ph,0) + s | |
| ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True) | |
| filtered = [p for p,_ in ranked if min_len <= len(p.split()) <= max_len] | |
| return filtered[:top_k] | |
| def tfidf_builder(texts, top_k=8): | |
| docs = [re.findall(r"[A-Za-z0-9#+\\-_/']+", t.lower()) for t in texts] | |
| vocab = {} | |
| for d in docs: | |
| for w in set(d): | |
| vocab[w] = vocab.get(w,0)+1 | |
| N = len(docs) | |
| def score(text): | |
| doc = re.findall(r"[A-Za-z0-9#+\\-_/']+", text.lower()) | |
| tf = {} | |
| for w in doc: | |
| tf[w] = tf.get(w,0)+1 | |
| scores = {} | |
| for w,c in tf.items(): | |
| df = vocab.get(w,1) | |
| idf = math.log((N+1)/(df+1))+1 | |
| scores[w] = (c/len(doc))*idf | |
| ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True) | |
| return [w for w,_ in ranked[:top_k]] | |
| return score | |
| def extract_keywords(topic, df: pd.DataFrame|None): | |
| if df is not None and len(df): | |
| sample = df["text"].sample(min(30, len(df)), random_state=42).tolist() | |
| rake_kw = simple_rake(" ".join(sample + [topic]), min_len=2, max_len=3, top_k=12) | |
| tfidf_fn = tfidf_builder(df["text"].tolist(), top_k=8) | |
| kw2 = tfidf_fn(topic + " " + " ".join(sample[:5])) | |
| raw = rake_kw + kw2 | |
| else: | |
| raw = simple_rake(topic, min_len=1, max_len=2, top_k=8) | |
| seen, out = set(), [] | |
| for k in raw: | |
| k2 = re.sub(r"\\s+"," ",k.strip().lower()) | |
| if k2 and k2 not in seen: | |
| seen.add(k2); out.append(k2) | |
| return out[:12] | |
| # βββββββββββββββββββββββββββββββββββββββββ | |
| # Interactive clarifier | |
| # βββββββββββββββββββββββββββββββββββββββββ | |
| def need_clarification(purpose, evidence): | |
| questions = [] | |
| if not purpose: | |
| questions.append("What outcome do you want from this post? (awareness, demo requests, hiring, launch, opinion, lesson)") | |
| if not evidence: | |
| questions.append("Share one concrete detail to include (metric, anecdote, quote, or specific example).") | |
| return questions | |
| # βββββββββββββββββββββββββββββββββββββββββ | |
| # Prompt (single post, plain text) | |
| # βββββββββββββββββββββββββββββββββββββββββ | |
| def build_prompt(topic, language, tone, target_len, purpose, audience, evidence, keywords, style_cues, clarifier_notes): | |
| kw_block = ", ".join((keywords or [])[:8]) if keywords else "N/A" | |
| cues_block = "\\n".join(f"- {c}" for c in (style_cues or [])[:4]) if style_cues else "- None" | |
| notes = (clarifier_notes or "").strip() | |
| return ( | |
| "You are a senior LinkedIn content strategist. " | |
| "Write one viral, insightful LinkedIn post as plain text only (no section headers, no labels).\n\n" | |
| f"Language: {language}\n" | |
| f"Topic: \"{topic}\"\n" | |
| f"Purpose: {purpose or 'awareness'}\n" | |
| f"Audience: {audience or 'general professionals'}\n" | |
| f"Tone: {tone}\n" | |
| f"Approx length: ~{target_len} words\n" | |
| f"Keywords to weave in naturally: {kw_block}\n" | |
| "Style cues (apply silently):\n" | |
| f"{cues_block}\n\n" | |
| "User-provided evidence/details (incorporate if relevant):\n" | |
| f"{evidence or 'None'}\n\n" | |
| "Additional notes from clarifier (apply silently):\n" | |
| f"{notes or 'None'}\n\n" | |
| "Rules (do not mention these explicitly):\n" | |
| "- Curiosity-driven first line.\n" | |
| "- Short paragraphs; concrete, novel insights (3β5), examples welcome.\n" | |
| "- Max 2 emojis; 2β4 niche hashtags only at end (optional).\n" | |
| "- No repeated sentences; avoid clichΓ©s.\n" | |
| "- Return a single cohesive post in plain text only." | |
| ) | |
| # βββββββββββββββββββββββββββββββββββββββββ | |
| # Streamlit UI | |
| # βββββββββββββββββββββββββββββββββββββββββ | |
| st.set_page_config(page_title="LinkedIn Post Generator β Groq (Interactive)", layout="centered") | |
| st.title("LinkedIn Post Generator β Interactive (Groq)") | |
| with st.sidebar: | |
| st.subheader("Groq & Decoding") | |
| model = st.selectbox("Groq model", | |
| ["llama-3.3-70b-versatile","llama-3.1-8b-instant","mixtral-8x7b-32768"], index=0) | |
| temperature = st.slider("Temperature", 0.1, 1.2, 0.6, 0.05) | |
| top_p = st.slider("Topβp", 0.1, 1.0, 0.9, 0.05) | |
| target_len = st.slider("Target length (words)", 60, 300, 140, 10) | |
| st.markdown("Set GROQ_API_KEY in Space β Settings β Variables & Secrets.") | |
| with st.form("main"): | |
| topic = st.text_input("Topic", "Generative AI for Business") | |
| purpose = st.selectbox("Purpose", ["", "awareness", "lead-gen", "hiring", "product launch", "opinion", "lesson learned"], index=0) | |
| audience = st.text_input("Audience", "Startup founders") | |
| tone = st.selectbox("Tone", ["Professional", "Friendly", "Contrarian", "Technical", "Inspirational"], index=0) | |
| language = st.selectbox("Language", ["English","Urdu","Arabic","French","Spanish"], index=0) | |
| st.markdown("Optional: upload CSV/JSON of past posts (must include 'text').") | |
| uploaded = st.file_uploader("Upload dataset", type=["csv","json"]) | |
| st.markdown("Optional: style cues (max 4, one per line).") | |
| style_text = st.text_area("Style cues", value="", placeholder="Short hooks\nActionable bullets\nStories with numbers\nTactical CTA") | |
| st.markdown("Optional: evidence to include (metric, anecdote, quote).") | |
| evidence = st.text_area("Evidence", value="") | |
| submitted = st.form_submit_button("Continue") | |
| # Session state for clarifier & output | |
| if "clarifier_notes" not in st.session_state: | |
| st.session_state.clarifier_notes = "" | |
| if "last_post" not in st.session_state: | |
| st.session_state.last_post = "" | |
| if submitted: | |
| # Load dataset and extract keywords | |
| posts_df = None | |
| if uploaded is not None: | |
| try: | |
| posts_df = load_posts_from_file(uploaded) | |
| except Exception as e: | |
| st.error(f"Dataset error: {e}") | |
| st.stop() | |
| keywords = extract_keywords(topic, posts_df) | |
| style_cues = [s.strip() for s in style_text.splitlines() if s.strip()][:4] | |
| # Clarifier | |
| qs = need_clarification(purpose, evidence) | |
| if qs: | |
| st.info("Clarifier") | |
| for q in qs: | |
| ans = st.text_input(q, key=f"q_{q}") | |
| if ans: | |
| st.session_state.clarifier_notes += f"{q} -> {ans}\n" | |
| if st.button("Generate Post"): | |
| prompt = build_prompt(topic, language, tone, target_len, purpose, audience, evidence, keywords, style_cues, st.session_state.clarifier_notes) | |
| with st.spinner("Generating with Groq..."): | |
| try: | |
| max_tokens = clamp(int(target_len*1.6)+120, 200, 1200) | |
| raw = groq_chat(prompt, model, temperature, top_p, max_tokens) | |
| clean = dedupe_sentences(strip_labels(raw)) | |
| st.session_state.last_post = clean | |
| except Exception as e: | |
| st.error(f"Groq generation failed: {e}") | |
| # show output if available | |
| if st.session_state.last_post: | |
| st.subheader("Post") | |
| st.write(st.session_state.last_post) | |
| st.download_button("Download (.txt)", st.session_state.last_post, file_name="linkedin_post.txt") | |
| else: | |
| # Generate directly | |
| prompt = build_prompt(topic, language, tone, target_len, purpose, audience, evidence, keywords, style_cues, st.session_state.clarifier_notes) | |
| with st.spinner("Generating with Groq..."): | |
| try: | |
| max_tokens = clamp(int(target_len*1.6)+120, 200, 1200) | |
| raw = groq_chat(prompt, model, temperature, top_p, max_tokens) | |
| clean = dedupe_sentences(strip_labels(raw)) | |
| st.session_state.last_post = clean | |
| except Exception as e: | |
| st.error(f"Groq generation failed: {e}") | |
| if st.session_state.last_post: | |
| st.subheader("Post") | |
| st.write(st.session_state.last_post) | |
| st.download_button("Download (.txt)", st.session_state.last_post, file_name="linkedin_post.txt") | |
| # Refinements (transform the last output) | |
| if st.session_state.last_post: | |
| st.markdown("---") | |
| st.subheader("Refine") | |
| col1, col2, col3, col4, col5 = st.columns(5) | |
| def refine(op): | |
| if not st.session_state.last_post: | |
| return | |
| instr = { | |
| "shorter": "Shorten to ~120 words. Keep the opening intact. Return plain text only.", | |
| "punchier": "Make the hook more punchy and contrarian; keep total length similar. Plain text only.", | |
| "add_data": "Add one concrete metric or example to support the main claim. Plain text only.", | |
| "less_emoji": "Remove emojis entirely. Plain text only.", | |
| "add_tags": "Append 2β4 niche hashtags at the end (new line). Plain text only." | |
| }[op] | |
| prompt = ( | |
| "You are editing a LinkedIn post. Apply the instruction and return plain text only.\n\n" | |
| f"Instruction: {instr}\n\n" | |
| f"Post:\n{st.session_state.last_post}" | |
| ) | |
| try: | |
| raw = groq_chat(prompt, model, temperature, top_p, clamp(600, 200, 1200)) | |
| st.session_state.last_post = dedupe_sentences(strip_labels(raw)) | |
| except Exception as e: | |
| st.error(f"Refinement failed: {e}") | |
| if col1.button("Shorter"): refine("shorter") | |
| if col2.button("Punchier hook"): refine("punchier") | |
| if col3.button("Add data point"): refine("add_data") | |
| if col4.button("No emojis"): refine("less_emoji") | |
| if col5.button("Add hashtags"): refine("add_tags") | |
| st.write(st.session_state.last_post) | |