Alpha108 commited on
Commit
af89629
·
verified ·
1 Parent(s): 2f6fb2e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -36
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import streamlit as st
2
  import json
3
  import os
 
4
  from transformers import pipeline
5
 
6
  STYLE_SAMPLES_FILE = "style_samples.json"
@@ -11,29 +12,69 @@ def load_style_samples():
11
  return json.load(f)
12
  return []
13
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  @st.cache_resource(show_spinner=False)
15
  def load_pipeline():
16
- # CPU-friendly seq2seq model; swap later if you upgrade hardware
17
  model_id = "google/flan-t5-base"
18
- # Avoid device_map to prevent Accelerate requirement on Spaces CPU
19
  gen_pipe = pipeline(
20
  task="text2text-generation",
21
  model=model_id
 
22
  )
23
  return gen_pipe
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  pipe = load_pipeline()
26
  style_samples = load_style_samples()
27
 
 
28
  st.set_page_config(page_title="LinkedIn Post Generator", layout="centered")
29
  st.title("🔗 LinkedIn Post Generator (Hugging Face)")
30
- st.write("Generate LinkedIn posts with few-shot style guidance.")
31
 
32
  with st.form("gen_form"):
33
  topic = st.text_input("Post Topic", "Generative AI for Business")
34
  tone = st.selectbox("Tone", ["Professional", "Friendly", "Inspirational", "Technical", "Concise"])
35
  audience = st.text_input("Audience", "Startup founders")
36
- length = st.slider("Length (approx words)", 30, 400, 120, 10)
 
37
  use_sample = st.selectbox(
38
  "Style Sample (optional)",
39
  ["None"] + [f"Sample {i+1}" for i in range(len(style_samples))]
@@ -43,36 +84,11 @@ with st.form("gen_form"):
43
  with st.expander("Advanced generation settings"):
44
  temperature = st.slider("Temperature", 0.1, 1.2, 0.7, 0.05)
45
  top_p = st.slider("Top-p (nucleus)", 0.1, 1.0, 0.9, 0.05)
46
- repetition_penalty = st.slider("Repetition penalty", 1.0, 2.0, 1.15, 0.05)
47
  no_repeat_ngram_size = st.slider("No-repeat n-gram size", 1, 6, 3, 1)
48
 
49
  submitted = st.form_submit_button("Generate Post")
50
 
51
- def build_prompt(topic, audience, tone, length, style_example_text):
52
- # Structured prompt to reduce repetition and produce LinkedIn-ready content
53
- return (
54
- "Task: Write a LinkedIn post.\n\n"
55
- f"Topic: \"{topic}\"\n"
56
- f"Audience: \"{audience}\"\n"
57
- f"Tone: \"{tone}\"\n"
58
- f"Target length: ~{length} words.\n\n"
59
- "Style requirements:\n"
60
- "- Start with a 1–2 line hook with a concrete claim or question.\n"
61
- "- Use 2–3 short paragraphs; keep sentences under 20 words.\n"
62
- "- Add 3–5 specific insights or steps (use bullet points if helpful).\n"
63
- "- End with a clear CTA (ask a question or invite comments).\n\n"
64
- "Constraints:\n"
65
- "- No repeated sentences or filler phrases.\n"
66
- "- Avoid clichés like “it's a great example of how we can make a difference in the world.”\n"
67
- "- Use plain business English.\n\n"
68
- f"Reference style:\n{style_example_text}\n\n"
69
- "Output format:\n"
70
- "HOOK:\n"
71
- "BODY:\n"
72
- "TAKEAWAY:\n"
73
- "CTA:\n"
74
- )
75
-
76
  style_example_text = ""
77
  if use_sample != "None":
78
  idx = int(use_sample.split()[1]) - 1
@@ -89,20 +105,21 @@ if submitted:
89
  try:
90
  outputs = pipe(
91
  prompt,
92
- max_new_tokens=length + 120, # give space for sections
93
  temperature=temperature,
94
  top_p=top_p,
95
  repetition_penalty=repetition_penalty,
96
  no_repeat_ngram_size=no_repeat_ngram_size
97
  )
98
- # Pipeline may return list or dict; handle both
99
  if isinstance(outputs, list) and outputs and "generated_text" in outputs[0]:
100
- result = outputs[0]["generated_text"].strip()
101
  elif isinstance(outputs, dict) and "generated_text" in outputs:
102
- result = outputs["generated_text"].strip()
103
  else:
104
- result = str(outputs)
105
 
 
106
  st.success("Here's your LinkedIn post:")
107
  st.write(result)
108
  st.download_button("Download post as .txt", result, file_name="linkedin_post.txt")
@@ -110,7 +127,7 @@ if submitted:
110
  st.error(f"Error generating post: {e}")
111
 
112
  st.markdown("---")
113
- st.write("Upload a JSON array of style samples (strings). This overwrites existing samples.")
114
  file = st.file_uploader("Upload style_samples.json", type=["json"])
115
  if file:
116
  try:
 
1
  import streamlit as st
2
  import json
3
  import os
4
+ import re
5
  from transformers import pipeline
6
 
7
  STYLE_SAMPLES_FILE = "style_samples.json"
 
12
  return json.load(f)
13
  return []
14
 
15
+ def dedupe_sentences(text: str) -> str:
16
+ # Remove verbatim repeated sentences, keep order
17
+ parts = re.split(r'(?<=[.!?])\s+', text.strip())
18
+ seen = set()
19
+ out = []
20
+ for p in parts:
21
+ norm = re.sub(r'\s+', ' ', p.strip().lower())
22
+ if norm and norm not in seen:
23
+ seen.add(norm)
24
+ out.append(p.strip())
25
+ return " ".join(out)
26
+
27
  @st.cache_resource(show_spinner=False)
28
  def load_pipeline():
29
+ # CPU-friendly model; swap later to a stronger instruct model if available
30
  model_id = "google/flan-t5-base"
 
31
  gen_pipe = pipeline(
32
  task="text2text-generation",
33
  model=model_id
34
+ # Note: no device_map to avoid Accelerate requirement on CPU Spaces
35
  )
36
  return gen_pipe
37
 
38
+ def build_prompt(topic, audience, tone, length, style_example_text):
39
+ # Structured prompt reduces looping and anchors the model
40
+ return (
41
+ "Task: Write a LinkedIn post.\n\n"
42
+ f"Topic: \"{topic}\"\n"
43
+ f"Audience: \"{audience}\"\n"
44
+ f"Tone: \"{tone}\"\n"
45
+ f"Target length: ~{length} words.\n\n"
46
+ "Style requirements:\n"
47
+ "- Start with a 1–2 line HOOK with a concrete claim or question.\n"
48
+ "- Use 2–3 short BODY paragraphs; sentences under 20 words.\n"
49
+ "- Add 3–5 specific insights or steps; bullets allowed.\n"
50
+ "- End with a clear CTA inviting comments.\n\n"
51
+ "Constraints:\n"
52
+ "- Do NOT repeat sentences or phrases.\n"
53
+ "- Avoid clichés like “it's a great example of how we can make a difference in the world.”\n"
54
+ "- Use plain business English.\n\n"
55
+ f"Reference style (optional):\n{style_example_text}\n\n"
56
+ "Output format (use these headers exactly):\n"
57
+ "HOOK:\n"
58
+ "BODY:\n"
59
+ "TAKEAWAY:\n"
60
+ "CTA:\n"
61
+ )
62
+
63
+ # Load resources
64
  pipe = load_pipeline()
65
  style_samples = load_style_samples()
66
 
67
+ # UI
68
  st.set_page_config(page_title="LinkedIn Post Generator", layout="centered")
69
  st.title("🔗 LinkedIn Post Generator (Hugging Face)")
70
+ st.write("Generate concise, structured LinkedIn posts with few-shot style guidance.")
71
 
72
  with st.form("gen_form"):
73
  topic = st.text_input("Post Topic", "Generative AI for Business")
74
  tone = st.selectbox("Tone", ["Professional", "Friendly", "Inspirational", "Technical", "Concise"])
75
  audience = st.text_input("Audience", "Startup founders")
76
+ length = st.slider("Length (approx words)", 40, 300, 120, 10)
77
+
78
  use_sample = st.selectbox(
79
  "Style Sample (optional)",
80
  ["None"] + [f"Sample {i+1}" for i in range(len(style_samples))]
 
84
  with st.expander("Advanced generation settings"):
85
  temperature = st.slider("Temperature", 0.1, 1.2, 0.7, 0.05)
86
  top_p = st.slider("Top-p (nucleus)", 0.1, 1.0, 0.9, 0.05)
87
+ repetition_penalty = st.slider("Repetition penalty", 1.0, 2.0, 1.2, 0.05)
88
  no_repeat_ngram_size = st.slider("No-repeat n-gram size", 1, 6, 3, 1)
89
 
90
  submitted = st.form_submit_button("Generate Post")
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  style_example_text = ""
93
  if use_sample != "None":
94
  idx = int(use_sample.split()[1]) - 1
 
105
  try:
106
  outputs = pipe(
107
  prompt,
108
+ max_new_tokens=length + 120,
109
  temperature=temperature,
110
  top_p=top_p,
111
  repetition_penalty=repetition_penalty,
112
  no_repeat_ngram_size=no_repeat_ngram_size
113
  )
114
+ # Handle list/dict return variants
115
  if isinstance(outputs, list) and outputs and "generated_text" in outputs[0]:
116
+ raw = outputs[0]["generated_text"].strip()
117
  elif isinstance(outputs, dict) and "generated_text" in outputs:
118
+ raw = outputs["generated_text"].strip()
119
  else:
120
+ raw = str(outputs)
121
 
122
+ result = dedupe_sentences(raw)
123
  st.success("Here's your LinkedIn post:")
124
  st.write(result)
125
  st.download_button("Download post as .txt", result, file_name="linkedin_post.txt")
 
127
  st.error(f"Error generating post: {e}")
128
 
129
  st.markdown("---")
130
+ st.write("Upload a JSON array of style sample strings (overwrites existing).")
131
  file = st.file_uploader("Upload style_samples.json", type=["json"])
132
  if file:
133
  try: