Alpha108 commited on
Commit
d0be94e
Β·
verified Β·
1 Parent(s): fb2e00d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -57
app.py CHANGED
@@ -8,7 +8,7 @@ import pandas as pd
8
  # ─────────────────────────────────────────
9
  # Config
10
  # ─────────────────────────────────────────
11
- DEFAULT_MODEL = "llama-3.3-70b-versatile" # Groq
12
  STOPWORDS = set("""
13
  a an and the or for nor but so yet of to in on with at by from as is are was were be being been
14
  i you he she it we they them us our your their this that these those here there
@@ -25,7 +25,7 @@ except ImportError:
25
  def get_groq_client():
26
  api_key = os.getenv("GROQ_API_KEY")
27
  if not api_key:
28
- raise RuntimeError("Missing GROQ_API_KEY. Set in Space β†’ Settings β†’ Variables & Secrets.")
29
  if Groq is None:
30
  raise RuntimeError("Package 'groq' not installed. Add 'groq' to requirements.txt.")
31
  return Groq(api_key=api_key)
@@ -35,7 +35,7 @@ def groq_chat(prompt, model, temperature, top_p, max_tokens):
35
  resp = client.chat.completions.create(
36
  model=model,
37
  messages=[
38
- {"role": "system", "content": "You craft concise, original, high-signal LinkedIn posts."},
39
  {"role": "user", "content": prompt}
40
  ],
41
  temperature=temperature,
@@ -45,7 +45,7 @@ def groq_chat(prompt, model, temperature, top_p, max_tokens):
45
  return resp.choices[0].message.content.strip()
46
 
47
  # ─────────────────────────────────────────
48
- # Utilities
49
  # ─────────────────────────────────────────
50
  def clamp(n, lo, hi):
51
  return max(lo, min(hi, n))
@@ -76,7 +76,7 @@ def strip_labels(text: str) -> str:
76
  return "\n".join(cleaned).strip()
77
 
78
  # ─────────────────────────────────────────
79
- # Dataset ingest + keywords (optional, improves relevance)
80
  # ─────────────────────────────────────────
81
  def load_posts_from_file(file) -> pd.DataFrame:
82
  name = file.name.lower()
@@ -88,14 +88,14 @@ def load_posts_from_file(file) -> pd.DataFrame:
88
  raise ValueError("Upload CSV or JSON.")
89
  cand = [c for c in df.columns if c.lower() in ("text","post","content","body")]
90
  if not cand:
91
- raise ValueError("Dataset must contain a 'text' (or post/content/body) column.")
92
  if "text" not in df.columns:
93
  df["text"] = df[cand[0]]
94
  df["text"] = df["text"].fillna("").astype(str)
95
  return df[["text"]]
96
 
97
  def simple_rake(text, min_len=2, max_len=3, top_k=12):
98
- words = re.findall(r"[A-Za-z0-9#+\-_/']+", text.lower())
99
  phrases, cur = [], []
100
  for w in words:
101
  if w in STOPWORDS:
@@ -116,21 +116,21 @@ def simple_rake(text, min_len=2, max_len=3, top_k=12):
116
  for ph in phrases:
117
  s = 0.0
118
  for t in ph.split():
119
- s += (degree.get(t,0)+1)/ (freq.get(t,1))
120
- scores[ph] = scores.get(ph,0)+s
121
  ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)
122
  filtered = [p for p,_ in ranked if min_len <= len(p.split()) <= max_len]
123
  return filtered[:top_k]
124
 
125
  def tfidf_builder(texts, top_k=8):
126
- docs = [re.findall(r"[A-Za-z0-9#+\-_/']+", t.lower()) for t in texts]
127
  vocab = {}
128
  for d in docs:
129
  for w in set(d):
130
  vocab[w] = vocab.get(w,0)+1
131
  N = len(docs)
132
  def score(text):
133
- doc = re.findall(r"[A-Za-z0-9#+\-_/']+", text.lower())
134
  tf = {}
135
  for w in doc:
136
  tf[w] = tf.get(w,0)+1
@@ -154,71 +154,94 @@ def extract_keywords(topic, df: pd.DataFrame|None):
154
  raw = simple_rake(topic, min_len=1, max_len=2, top_k=8)
155
  seen, out = set(), []
156
  for k in raw:
157
- k2 = re.sub(r"\s+"," ",k.strip().lower())
158
  if k2 and k2 not in seen:
159
  seen.add(k2); out.append(k2)
160
  return out[:12]
161
 
162
  # ─────────────────────────────────────────
163
- # Stage‑2 Prompt (hidden structure, plain output)
164
  # ─────────────────────────────────────────
165
- def build_stage2_prompt(topic, language, target_len, tone, keywords=None, style_cues=None):
 
 
 
 
 
 
 
 
 
 
 
166
  kw_block = ", ".join((keywords or [])[:8]) if keywords else "N/A"
167
- cues_block = "\n".join(f"- {c}" for c in (style_cues or [])[:4]) if style_cues else "- None"
 
168
  return (
169
- "You are a senior LinkedIn content strategist.\n"
170
- "Objective: Write a viral, insightful LinkedIn post as plain text only (no section headers, no labels).\n\n"
171
  f"Language: {language}\n"
172
  f"Topic: \"{topic}\"\n"
173
- f"Tone: \"{tone}\"\n"
 
 
174
  f"Approx length: ~{target_len} words\n"
175
  f"Keywords to weave in naturally: {kw_block}\n"
176
  "Style cues (apply silently):\n"
177
  f"{cues_block}\n\n"
178
- "Apply without mentioning rules:\n"
179
- "- Curiosity‑driven first line.\n"
 
 
 
 
180
  "- Short paragraphs; concrete, novel insights (3–5), examples welcome.\n"
181
- "- Max 2 emojis; 2–4 niche hashtags only at very end (optional).\n"
182
  "- No repeated sentences; avoid clichΓ©s.\n"
183
- "- Output must be one cohesive post in plain text. No labels or headings."
184
  )
185
 
186
  # ─────────────────────────────────────────
187
- # UI
188
  # ─────────────────────────────────────────
189
- st.set_page_config(page_title="LinkedIn Post Generator β€” Stage 2 (Groq)", layout="centered")
190
- st.title("Stage 2: Topic β†’ Prompt β†’ Llama‑3.x (Groq) β†’ 3 Variants")
191
 
192
  with st.sidebar:
193
  st.subheader("Groq & Decoding")
194
- model = st.selectbox(
195
- "Groq model",
196
- options=["llama-3.3-70b-versatile","llama-3.1-8b-instant","mixtral-8x7b-32768"],
197
- index=0
198
- )
199
  temperature = st.slider("Temperature", 0.1, 1.2, 0.6, 0.05)
200
  top_p = st.slider("Top‑p", 0.1, 1.0, 0.9, 0.05)
201
  target_len = st.slider("Target length (words)", 60, 300, 140, 10)
202
  st.markdown("Set GROQ_API_KEY in Space β†’ Settings β†’ Variables & Secrets.")
203
 
204
- with st.form("stage2_form"):
205
  topic = st.text_input("Topic", "Generative AI for Business")
 
 
 
206
  language = st.selectbox("Language", ["English","Urdu","Arabic","French","Spanish"], index=0)
207
- tone = st.selectbox("Tone", ["Professional","Friendly","Inspirational","Technical","Concise"], index=0)
208
 
209
- st.markdown("Optional: upload a dataset of past LinkedIn posts (CSV/JSON) with a 'text' column.")
210
- uploaded = st.file_uploader("Upload CSV/JSON", type=["csv","json"])
211
 
212
- st.markdown("Optional: add up to 4 style cues (one per line).")
213
  style_text = st.text_area("Style cues", value="", placeholder="Short hooks\nActionable bullets\nStories with numbers\nTactical CTA")
214
 
215
- submitted = st.form_submit_button("Generate 3 Variants")
 
216
 
217
- if submitted:
218
- if not os.getenv("GROQ_API_KEY"):
219
- st.error("GROQ_API_KEY missing. Add it in Space β†’ Settings β†’ Variables & Secrets.")
220
- st.stop()
 
 
 
221
 
 
 
222
  posts_df = None
223
  if uploaded is not None:
224
  try:
@@ -226,27 +249,79 @@ if submitted:
226
  except Exception as e:
227
  st.error(f"Dataset error: {e}")
228
  st.stop()
229
-
230
  keywords = extract_keywords(topic, posts_df)
231
  style_cues = [s.strip() for s in style_text.splitlines() if s.strip()][:4]
232
 
233
- prompt = build_stage2_prompt(topic, language, target_len, tone, keywords, style_cues)
234
-
235
- st.subheader("Variants")
236
- variants = []
237
- with st.spinner("Generating with Groq..."):
238
- try:
239
- max_tokens = clamp(int(target_len*1.6)+120, 200, 1200)
240
- # Generate 3 separate candidates
241
- for i in range(3):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
  raw = groq_chat(prompt, model, temperature, top_p, max_tokens)
243
  clean = dedupe_sentences(strip_labels(raw))
244
- variants.append(clean)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  except Exception as e:
246
- st.error(f"Groq generation failed: {e}")
247
- st.stop()
 
 
 
 
 
248
 
249
- for i, v in enumerate(variants, start=1):
250
- st.markdown(f"### Post {i}")
251
- st.write(v)
252
- st.download_button(f"Download Post {i}", v, file_name=f"post_{i}.txt")
 
8
  # ─────────────────────────────────────────
9
  # Config
10
  # ─────────────────────────────────────────
11
+ DEFAULT_MODEL = "llama-3.3-70b-versatile"
12
  STOPWORDS = set("""
13
  a an and the or for nor but so yet of to in on with at by from as is are was were be being been
14
  i you he she it we they them us our your their this that these those here there
 
25
  def get_groq_client():
26
  api_key = os.getenv("GROQ_API_KEY")
27
  if not api_key:
28
+ raise RuntimeError("Missing GROQ_API_KEY. Set it in Space β†’ Settings β†’ Variables & Secrets.")
29
  if Groq is None:
30
  raise RuntimeError("Package 'groq' not installed. Add 'groq' to requirements.txt.")
31
  return Groq(api_key=api_key)
 
35
  resp = client.chat.completions.create(
36
  model=model,
37
  messages=[
38
+ {"role": "system", "content": "You craft concise, original, high-signal LinkedIn posts. Respond with plain text only."},
39
  {"role": "user", "content": prompt}
40
  ],
41
  temperature=temperature,
 
45
  return resp.choices[0].message.content.strip()
46
 
47
  # ─────────────────────────────────────────
48
+ # Utils
49
  # ─────────────────────────────────────────
50
  def clamp(n, lo, hi):
51
  return max(lo, min(hi, n))
 
76
  return "\n".join(cleaned).strip()
77
 
78
  # ─────────────────────────────────────────
79
+ # Dataset ingest + keywords (optional)
80
  # ─────────────────────────────────────────
81
  def load_posts_from_file(file) -> pd.DataFrame:
82
  name = file.name.lower()
 
88
  raise ValueError("Upload CSV or JSON.")
89
  cand = [c for c in df.columns if c.lower() in ("text","post","content","body")]
90
  if not cand:
91
+ raise ValueError("Dataset must contain 'text' (or post/content/body).")
92
  if "text" not in df.columns:
93
  df["text"] = df[cand[0]]
94
  df["text"] = df["text"].fillna("").astype(str)
95
  return df[["text"]]
96
 
97
  def simple_rake(text, min_len=2, max_len=3, top_k=12):
98
+ words = re.findall(r"[A-Za-z0-9#+\\-_/']+", text.lower())
99
  phrases, cur = [], []
100
  for w in words:
101
  if w in STOPWORDS:
 
116
  for ph in phrases:
117
  s = 0.0
118
  for t in ph.split():
119
+ s += (degree.get(t,0)+1) / (freq.get(t,1))
120
+ scores[ph] = scores.get(ph,0) + s
121
  ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)
122
  filtered = [p for p,_ in ranked if min_len <= len(p.split()) <= max_len]
123
  return filtered[:top_k]
124
 
125
  def tfidf_builder(texts, top_k=8):
126
+ docs = [re.findall(r"[A-Za-z0-9#+\\-_/']+", t.lower()) for t in texts]
127
  vocab = {}
128
  for d in docs:
129
  for w in set(d):
130
  vocab[w] = vocab.get(w,0)+1
131
  N = len(docs)
132
  def score(text):
133
+ doc = re.findall(r"[A-Za-z0-9#+\\-_/']+", text.lower())
134
  tf = {}
135
  for w in doc:
136
  tf[w] = tf.get(w,0)+1
 
154
  raw = simple_rake(topic, min_len=1, max_len=2, top_k=8)
155
  seen, out = set(), []
156
  for k in raw:
157
+ k2 = re.sub(r"\\s+"," ",k.strip().lower())
158
  if k2 and k2 not in seen:
159
  seen.add(k2); out.append(k2)
160
  return out[:12]
161
 
162
  # ─────────────────────────────────────────
163
+ # Interactive clarifier
164
  # ─────────────────────────────────────────
165
+ def need_clarification(purpose, evidence):
166
+ questions = []
167
+ if not purpose:
168
+ questions.append("What outcome do you want from this post? (awareness, demo requests, hiring, launch, opinion, lesson)")
169
+ if not evidence:
170
+ questions.append("Share one concrete detail to include (metric, anecdote, quote, or specific example).")
171
+ return questions
172
+
173
+ # ─────────────────────────────────────────
174
+ # Prompt (single post, plain text)
175
+ # ─────────────────────────────────────────
176
+ def build_prompt(topic, language, tone, target_len, purpose, audience, evidence, keywords, style_cues, clarifier_notes):
177
  kw_block = ", ".join((keywords or [])[:8]) if keywords else "N/A"
178
+ cues_block = "\\n".join(f"- {c}" for c in (style_cues or [])[:4]) if style_cues else "- None"
179
+ notes = (clarifier_notes or "").strip()
180
  return (
181
+ "You are a senior LinkedIn content strategist. "
182
+ "Write one viral, insightful LinkedIn post as plain text only (no section headers, no labels).\n\n"
183
  f"Language: {language}\n"
184
  f"Topic: \"{topic}\"\n"
185
+ f"Purpose: {purpose or 'awareness'}\n"
186
+ f"Audience: {audience or 'general professionals'}\n"
187
+ f"Tone: {tone}\n"
188
  f"Approx length: ~{target_len} words\n"
189
  f"Keywords to weave in naturally: {kw_block}\n"
190
  "Style cues (apply silently):\n"
191
  f"{cues_block}\n\n"
192
+ "User-provided evidence/details (incorporate if relevant):\n"
193
+ f"{evidence or 'None'}\n\n"
194
+ "Additional notes from clarifier (apply silently):\n"
195
+ f"{notes or 'None'}\n\n"
196
+ "Rules (do not mention these explicitly):\n"
197
+ "- Curiosity-driven first line.\n"
198
  "- Short paragraphs; concrete, novel insights (3–5), examples welcome.\n"
199
+ "- Max 2 emojis; 2–4 niche hashtags only at end (optional).\n"
200
  "- No repeated sentences; avoid clichΓ©s.\n"
201
+ "- Return a single cohesive post in plain text only."
202
  )
203
 
204
  # ─────────────────────────────────────────
205
+ # Streamlit UI
206
  # ─────────────────────────────────────────
207
+ st.set_page_config(page_title="LinkedIn Post Generator β€” Groq (Interactive)", layout="centered")
208
+ st.title("LinkedIn Post Generator β€” Interactive (Groq)")
209
 
210
  with st.sidebar:
211
  st.subheader("Groq & Decoding")
212
+ model = st.selectbox("Groq model",
213
+ ["llama-3.3-70b-versatile","llama-3.1-8b-instant","mixtral-8x7b-32768"], index=0)
 
 
 
214
  temperature = st.slider("Temperature", 0.1, 1.2, 0.6, 0.05)
215
  top_p = st.slider("Top‑p", 0.1, 1.0, 0.9, 0.05)
216
  target_len = st.slider("Target length (words)", 60, 300, 140, 10)
217
  st.markdown("Set GROQ_API_KEY in Space β†’ Settings β†’ Variables & Secrets.")
218
 
219
+ with st.form("main"):
220
  topic = st.text_input("Topic", "Generative AI for Business")
221
+ purpose = st.selectbox("Purpose", ["", "awareness", "lead-gen", "hiring", "product launch", "opinion", "lesson learned"], index=0)
222
+ audience = st.text_input("Audience", "Startup founders")
223
+ tone = st.selectbox("Tone", ["Professional", "Friendly", "Contrarian", "Technical", "Inspirational"], index=0)
224
  language = st.selectbox("Language", ["English","Urdu","Arabic","French","Spanish"], index=0)
 
225
 
226
+ st.markdown("Optional: upload CSV/JSON of past posts (must include 'text').")
227
+ uploaded = st.file_uploader("Upload dataset", type=["csv","json"])
228
 
229
+ st.markdown("Optional: style cues (max 4, one per line).")
230
  style_text = st.text_area("Style cues", value="", placeholder="Short hooks\nActionable bullets\nStories with numbers\nTactical CTA")
231
 
232
+ st.markdown("Optional: evidence to include (metric, anecdote, quote).")
233
+ evidence = st.text_area("Evidence", value="")
234
 
235
+ submitted = st.form_submit_button("Continue")
236
+
237
+ # Session state for clarifier & output
238
+ if "clarifier_notes" not in st.session_state:
239
+ st.session_state.clarifier_notes = ""
240
+ if "last_post" not in st.session_state:
241
+ st.session_state.last_post = ""
242
 
243
+ if submitted:
244
+ # Load dataset and extract keywords
245
  posts_df = None
246
  if uploaded is not None:
247
  try:
 
249
  except Exception as e:
250
  st.error(f"Dataset error: {e}")
251
  st.stop()
 
252
  keywords = extract_keywords(topic, posts_df)
253
  style_cues = [s.strip() for s in style_text.splitlines() if s.strip()][:4]
254
 
255
+ # Clarifier
256
+ qs = need_clarification(purpose, evidence)
257
+ if qs:
258
+ st.info("Clarifier")
259
+ for q in qs:
260
+ ans = st.text_input(q, key=f"q_{q}")
261
+ if ans:
262
+ st.session_state.clarifier_notes += f"{q} -> {ans}\n"
263
+ if st.button("Generate Post"):
264
+ prompt = build_prompt(topic, language, tone, target_len, purpose, audience, evidence, keywords, style_cues, st.session_state.clarifier_notes)
265
+ with st.spinner("Generating with Groq..."):
266
+ try:
267
+ max_tokens = clamp(int(target_len*1.6)+120, 200, 1200)
268
+ raw = groq_chat(prompt, model, temperature, top_p, max_tokens)
269
+ clean = dedupe_sentences(strip_labels(raw))
270
+ st.session_state.last_post = clean
271
+ except Exception as e:
272
+ st.error(f"Groq generation failed: {e}")
273
+ # show output if available
274
+ if st.session_state.last_post:
275
+ st.subheader("Post")
276
+ st.write(st.session_state.last_post)
277
+ st.download_button("Download (.txt)", st.session_state.last_post, file_name="linkedin_post.txt")
278
+ else:
279
+ # Generate directly
280
+ prompt = build_prompt(topic, language, tone, target_len, purpose, audience, evidence, keywords, style_cues, st.session_state.clarifier_notes)
281
+ with st.spinner("Generating with Groq..."):
282
+ try:
283
+ max_tokens = clamp(int(target_len*1.6)+120, 200, 1200)
284
  raw = groq_chat(prompt, model, temperature, top_p, max_tokens)
285
  clean = dedupe_sentences(strip_labels(raw))
286
+ st.session_state.last_post = clean
287
+ except Exception as e:
288
+ st.error(f"Groq generation failed: {e}")
289
+
290
+ if st.session_state.last_post:
291
+ st.subheader("Post")
292
+ st.write(st.session_state.last_post)
293
+ st.download_button("Download (.txt)", st.session_state.last_post, file_name="linkedin_post.txt")
294
+
295
+ # Refinements (transform the last output)
296
+ if st.session_state.last_post:
297
+ st.markdown("---")
298
+ st.subheader("Refine")
299
+ col1, col2, col3, col4, col5 = st.columns(5)
300
+ def refine(op):
301
+ if not st.session_state.last_post:
302
+ return
303
+ instr = {
304
+ "shorter": "Shorten to ~120 words. Keep the opening intact. Return plain text only.",
305
+ "punchier": "Make the hook more punchy and contrarian; keep total length similar. Plain text only.",
306
+ "add_data": "Add one concrete metric or example to support the main claim. Plain text only.",
307
+ "less_emoji": "Remove emojis entirely. Plain text only.",
308
+ "add_tags": "Append 2–4 niche hashtags at the end (new line). Plain text only."
309
+ }[op]
310
+ prompt = (
311
+ "You are editing a LinkedIn post. Apply the instruction and return plain text only.\n\n"
312
+ f"Instruction: {instr}\n\n"
313
+ f"Post:\n{st.session_state.last_post}"
314
+ )
315
+ try:
316
+ raw = groq_chat(prompt, model, temperature, top_p, clamp(600, 200, 1200))
317
+ st.session_state.last_post = dedupe_sentences(strip_labels(raw))
318
  except Exception as e:
319
+ st.error(f"Refinement failed: {e}")
320
+
321
+ if col1.button("Shorter"): refine("shorter")
322
+ if col2.button("Punchier hook"): refine("punchier")
323
+ if col3.button("Add data point"): refine("add_data")
324
+ if col4.button("No emojis"): refine("less_emoji")
325
+ if col5.button("Add hashtags"): refine("add_tags")
326
 
327
+ st.write(st.session_state.last_post)