3v324v23 commited on
Commit
408e06f
·
1 Parent(s): 6649801

Deploy refined v2 slogan generator with Gradio UI

Browse files
Files changed (1) hide show
  1. app.py +86 -151
app.py CHANGED
@@ -2,118 +2,97 @@
2
  import gradio as gr
3
  import pandas as pd
4
  import numpy as np
5
- from sentence_transformers import SentenceTransformer
6
- import faiss
7
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
8
- import torch
9
- import re
10
 
11
- # Load generation model (FLAN-T5-base for reliability)
12
- GEN_TOK = AutoTokenizer.from_pretrained("google/flan-t5-base")
13
- GEN_MODEL = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
14
  DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
  GEN_MODEL = GEN_MODEL.to(DEVICE)
16
 
17
- # Load embedding model
18
- embed_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
19
 
20
- # Marketing lexicons
21
- MARKETING_VERBS = {"build","grow","simplify","discover","create","connect","transform","unlock","boost","learn","move","clarify"}
22
- BENEFIT_WORDS = {"faster","smarter","easier","better","safer","clearer","stronger","together","confidently","simply","instantly"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  BLOCK_PATTERNS = [
24
  r"^[A-Z][a-z]+ [A-Z][a-z]+ (Platform|Solution|System|Application|Marketplace)$",
25
  r"^[A-Z][a-z]+ [A-Z][a-z]+$",
26
- r"^[A-Z][a-z]+$"
27
  ]
28
- FORBIDDEN_WORDS = {
29
- "app","assistant","platform","solution","system","marketplace",
30
- "ai","machine learning","augmented reality","virtual reality",
31
- "decentralized","empower"
32
- }
33
 
34
- def _tokens(s: str):
35
- return re.findall(r"[a-z0-9]{3,}", s.lower())
 
 
 
 
36
 
37
- def _jaccard(a, b):
38
- A, B = set(a), set(b)
39
- return len(A & B) / len(A | B) if A and B else 0.0
 
 
 
 
 
40
 
41
  def _is_blocked_slogan(s: str) -> bool:
42
- if not s:
 
43
  return True
44
  for pat in BLOCK_PATTERNS:
45
  if re.match(pat, s.strip()):
46
  return True
47
- low = s.lower()
48
- for w in FORBIDDEN_WORDS:
49
- if w in low:
50
- return True
51
  return False
52
 
53
- def _generic_penalty(s: str) -> float:
54
- low = s.lower()
55
- hits = sum(1 for w in FORBIDDEN_WORDS if w in low)
56
- return min(1.0, hits * 0.2)
57
-
58
- def _for_penalty(s: str) -> float:
59
- return 0.3 if re.search(r"\bfor\b", s.lower()) else 0.0
60
-
61
- def _clean_line(text: str, max_words: int = 8) -> str:
62
- text = text.strip().split("\n")[0]
63
- text = re.sub(r"[\"“”‘’]", "", text)
64
- text = re.sub(r"\s+", " ", text).strip()
65
- text = re.sub(r"^\W+|\W+$", "", text)
66
- words = text.split()
67
- if len(words) > max_words:
68
- text = " ".join(words[:max_words])
69
- # Soft title case
70
- out_words = []
71
- for w in text.split():
72
- out_words.append(w if w.isupper() else w.capitalize())
73
- return " ".join(out_words)
74
-
75
- def _marketing_score(s: str) -> float:
76
- words = set(w.lower() for w in s.split())
77
- verb_hits = len(words & MARKETING_VERBS)
78
- benefit_hits = len(words & BENEFIT_WORDS)
79
- return min(1.0, 0.25 * verb_hits + 0.25 * benefit_hits)
80
-
81
- def recommend(query: str, top_k: int = 3) -> pd.DataFrame:
82
- """Return top_k items most similar to the query based on description embeddings."""
83
- query_vec = embed_model.encode([query])
84
- faiss.normalize_L2(query_vec)
85
- scores, idx = index.search(query_vec, top_k)
86
- results = data.iloc[idx[0]].copy()
87
- results["score"] = scores[0]
88
- return results[["name", "tagline", "description", "score"]]
89
-
90
- def generate_slogan(query_text: str, neighbors_df: pd.DataFrame = None, n_samples: int = 16) -> str:
91
- """
92
- Generate multiple slogans using FLAN-T5, filter and score them,
93
- then return the best slogan based on semantic similarity and marketing tone.
94
- """
95
- ctx_lines = []
96
- if neighbors_df is not None and not neighbors_df.empty:
97
- for _, row in neighbors_df.head(3).iterrows():
98
- tg = str(row.get("tagline", "")).strip()
99
- if 5 <= len(tg) <= 70:
100
- ctx_lines.append(f"- {tg}")
101
- context = "\n".join(ctx_lines)
102
  prompt = (
103
  "You are a creative brand copywriter. Write short, original, memorable startup slogans (max 8 words).\n"
104
  "Forbidden words: app, assistant, platform, solution, system, marketplace, AI, machine learning, augmented reality, virtual reality, decentralized, empower.\n"
105
- "Focus on clear benefits and vivid verbs. Do not copy the description. Return ONLY a list, one slogan per line.\n\n"
106
- "Good Examples:\n"
107
- "Description: AI assistant for doctors to prioritize patient cases\n"
108
- "Slogan: Less Guessing. More Healing.\n\n"
109
- "Description: Payments for small online stores\n"
110
- "Slogan: Built To Grow With Your Cart.\n\n"
111
- "Description: Neurotech headset to boost focus\n"
112
- "Slogan: Train Your Brain To Win.\n\n"
113
  )
114
- if context:
115
- prompt += f"Similar taglines (style only):\n{context}\n\n"
116
- prompt += f"Description: {query_text}\nSlogans:"
117
 
118
  input_ids = GEN_TOK(prompt, return_tensors="pt").input_ids.to(DEVICE)
119
  outputs = GEN_MODEL.generate(
@@ -123,80 +102,36 @@ def generate_slogan(query_text: str, neighbors_df: pd.DataFrame = None, n_sample
123
  top_k=60,
124
  top_p=0.92,
125
  temperature=1.2,
126
- num_return_sequences=n_samples,
127
- repetition_penalty=1.08
128
  )
129
- raw_texts = [GEN_TOK.decode(o, skip_special_tokens=True) for o in outputs]
130
 
131
- candidates = set()
132
- for txt in raw_texts:
 
 
133
  for line in txt.split("\n"):
134
- s = _clean_line(line)
135
- if not s or len(s.split()) < 2 or len(s.split()) > 8:
136
- continue
137
- if _is_blocked_slogan(s):
138
- continue
139
- # Avoid copying neighbor taglines
140
- skip = False
141
- if neighbors_df is not None and not neighbors_df.empty:
142
- for _, row in neighbors_df.iterrows():
143
- tg = str(row.get("tagline", "")).strip()
144
- if not tg:
145
- continue
146
- if s.lower() == tg.lower():
147
- skip = True
148
- break
149
- if _jaccard(_tokens(s), _tokens(tg.lower())) >= 0.7:
150
- skip = True
151
- break
152
- if skip:
153
- continue
154
- candidates.add(s)
155
-
156
- if not candidates:
157
- first = _clean_line(raw_texts[0])
158
- return first if first else query_text
159
-
160
- query_vec = embed_model.encode([query_text])[0]
161
- query_vec = query_vec / np.linalg.norm(query_vec)
162
- scored = []
163
- for s in candidates:
164
- s_vec = embed_model.encode([s])[0]
165
- s_vec = s_vec / np.linalg.norm(s_vec)
166
- similarity = float(np.dot(query_vec, s_vec))
167
- brevity = 1.0 - min(1.0, abs(len(s.split()) - 5) / 5.0)
168
- marketing = _marketing_score(s)
169
- generic = _generic_penalty(s)
170
- for_pen = _for_penalty(s)
171
- score = 0.6*similarity + 0.2*brevity + 0.2*marketing - 0.05*generic - 0.05*for_pen
172
- scored.append((s, score))
173
- scored.sort(key=lambda x: x[1], reverse=True)
174
- return scored[0][0]
175
 
176
- # Dummy dataset; replace with your full dataset
177
- data = pd.DataFrame({
178
- "name": ["HowDidIDo", "Museotainment", "Movitr"],
179
- "tagline": ["Online evaluation platform", "PacMan & Louvre meet", "Crowdsourced video translation"],
180
- "description": [
181
- "Public speaking, Presentation skills and interview practice",
182
- "Interactive AR museum tours",
183
- "Video translation with voice and subtitles"
184
- ]
185
- })
186
 
187
- # Build FAISS index
188
- data_vecs = embed_model.encode(data["description"].tolist())
189
- faiss.normalize_L2(data_vecs)
190
- index = faiss.IndexFlatIP(data_vecs.shape[1])
191
- index.add(data_vecs)
192
 
 
193
  def pipeline(user_input):
194
  recs = recommend(user_input, top_k=3)
195
- slogan = generate_slogan(user_input, neighbors_df=recs)
196
  recs = recs.reset_index(drop=True)
197
  recs.loc[len(recs)] = ["Generated Slogan", slogan, user_input, np.nan]
198
  return recs
199
 
 
200
  examples = [
201
  "AI coach for improving public speaking skills",
202
  "Augmented reality app for interactive museum tours",
@@ -208,7 +143,7 @@ examples = [
208
  demo = gr.Interface(
209
  fn=pipeline,
210
  inputs=gr.Textbox(label="Enter a startup description"),
211
- outputs=gr.Dataframe(headers=["Name","Tagline","Description","Score"]),
212
  examples=examples,
213
  title="SloganAI – Startup Recommendation & Slogan Generator",
214
  description="Enter a startup idea and get top-3 similar startups + 1 generated slogan."
 
2
  import gradio as gr
3
  import pandas as pd
4
  import numpy as np
5
+ import faiss, re, torch
6
+ from sentence_transformers import SentenceTransformer, CrossEncoder
7
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 
 
8
 
9
+ # ------------------ Models ------------------
10
+ GEN_TOK = AutoTokenizer.from_pretrained("google/flan-t5-large")
11
+ GEN_MODEL = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")
12
  DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
  GEN_MODEL = GEN_MODEL.to(DEVICE)
14
 
15
+ EMBED_MODEL = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
16
+ RERANKER = CrossEncoder("cross-encoder/stsb-roberta-base")
17
 
18
+ # ------------------ Dummy dataset (for demo) ------------------
19
+ data = pd.DataFrame({
20
+ "name": ["HowDidIDo", "Museotainment", "Movitr"],
21
+ "tagline": ["Online evaluation platform", "PacMan & Louvre meet", "Crowdsourced video translation"],
22
+ "description": [
23
+ "Public speaking, Presentation skills and interview practice",
24
+ "Interactive AR museum tours",
25
+ "Video translation with voice and subtitles"
26
+ ]
27
+ })
28
+
29
+ # Build FAISS index
30
+ data_vecs = EMBED_MODEL.encode(data["description"].tolist())
31
+ faiss.normalize_L2(data_vecs)
32
+ index = faiss.IndexFlatIP(data_vecs.shape[1])
33
+ index.add(data_vecs)
34
+
35
+ def recommend(query, top_k=3):
36
+ query_vec = EMBED_MODEL.encode([query])
37
+ faiss.normalize_L2(query_vec)
38
+ scores, idx = index.search(query_vec, top_k)
39
+ results = data.iloc[idx[0]].copy()
40
+ results["score"] = scores[0]
41
+ return results[["name", "tagline", "description", "score"]]
42
+
43
+ # ------------------ Helpers ------------------
44
  BLOCK_PATTERNS = [
45
  r"^[A-Z][a-z]+ [A-Z][a-z]+ (Platform|Solution|System|Application|Marketplace)$",
46
  r"^[A-Z][a-z]+ [A-Z][a-z]+$",
47
+ r"^[A-Z][a-z]+$",
48
  ]
 
 
 
 
 
49
 
50
+ HARD_BLOCK_WORDS = {"platform","solution","system","application","marketplace",
51
+ "ai-powered","ai powered","empower","empowering",
52
+ "artificial intelligence","machine learning","augmented reality","virtual reality"}
53
+ GENERIC_WORDS = {"app","assistant","smart","ai","ml","ar","vr","decentralized","blockchain"}
54
+ MARKETING_VERBS = {"build","grow","simplify","discover","create","connect","transform","unlock","boost","learn"}
55
+ BENEFIT_WORDS = {"faster","smarter","easier","better","safer","clearer"}
56
 
57
+ def _clean_slogan(text: str, max_words: int = 8) -> str:
58
+ text = text.strip().split("\n")[0]
59
+ text = re.sub(r"[\"“”‘’]", "", text)
60
+ text = re.sub(r"\s+", " ", text).strip()
61
+ words = text.split()
62
+ if len(words) > max_words:
63
+ text = " ".join(words[:max_words])
64
+ return text
65
 
66
  def _is_blocked_slogan(s: str) -> bool:
67
+ s_low = s.lower()
68
+ if any(w in s_low for w in HARD_BLOCK_WORDS):
69
  return True
70
  for pat in BLOCK_PATTERNS:
71
  if re.match(pat, s.strip()):
72
  return True
 
 
 
 
73
  return False
74
 
75
+ def _score_candidates(query: str, cands: list) -> list:
76
+ if not cands:
77
+ return []
78
+ ce_scores = np.asarray(RERANKER.predict([(query, s) for s in cands]), dtype=np.float32) / 5.0
79
+ results = []
80
+ for i, s in enumerate(cands):
81
+ words = s.split()
82
+ brevity = 1.0 - min(1.0, abs(len(words) - 5) / 5.0)
83
+ marketing = 0.2*len(set(words) & MARKETING_VERBS) + 0.2*len(set(words) & BENEFIT_WORDS)
84
+ score = 0.6*float(ce_scores[i]) + 0.2*brevity + 0.2*marketing
85
+ results.append((s, float(score)))
86
+ return results
87
+
88
+ # ------------------ Generator ------------------
89
+ def generate_slogan(query_text: str, n_samples: int = 16) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  prompt = (
91
  "You are a creative brand copywriter. Write short, original, memorable startup slogans (max 8 words).\n"
92
  "Forbidden words: app, assistant, platform, solution, system, marketplace, AI, machine learning, augmented reality, virtual reality, decentralized, empower.\n"
93
+ "Focus on benefits and vivid verbs. Do not copy the description.\n\n"
94
+ f"Description: {query_text}\nSlogans:"
 
 
 
 
 
 
95
  )
 
 
 
96
 
97
  input_ids = GEN_TOK(prompt, return_tensors="pt").input_ids.to(DEVICE)
98
  outputs = GEN_MODEL.generate(
 
102
  top_k=60,
103
  top_p=0.92,
104
  temperature=1.2,
105
+ num_return_sequences=n_samples
 
106
  )
 
107
 
108
+ raw_cands = [GEN_TOK.decode(o, skip_special_tokens=True) for o in outputs]
109
+
110
+ cand_set = set()
111
+ for txt in raw_cands:
112
  for line in txt.split("\n"):
113
+ s = _clean_slogan(line)
114
+ if not s: continue
115
+ if len(s.split()) < 2 or len(s.split()) > 8: continue
116
+ if _is_blocked_slogan(s): continue
117
+ cand_set.add(s.capitalize())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
+ if not cand_set:
120
+ return "Fresh Ideas, Built To Scale"
 
 
 
 
 
 
 
 
121
 
122
+ scored = _score_candidates(query_text, sorted(cand_set))
123
+ scored.sort(key=lambda x: x[1], reverse=True)
124
+ return scored[0][0] if scored else "Fresh Ideas, Built To Scale"
 
 
125
 
126
+ # ------------------ Pipeline ------------------
127
  def pipeline(user_input):
128
  recs = recommend(user_input, top_k=3)
129
+ slogan = generate_slogan(user_input)
130
  recs = recs.reset_index(drop=True)
131
  recs.loc[len(recs)] = ["Generated Slogan", slogan, user_input, np.nan]
132
  return recs
133
 
134
+ # ------------------ Gradio UI ------------------
135
  examples = [
136
  "AI coach for improving public speaking skills",
137
  "Augmented reality app for interactive museum tours",
 
143
  demo = gr.Interface(
144
  fn=pipeline,
145
  inputs=gr.Textbox(label="Enter a startup description"),
146
+ outputs=gr.Dataframe(headers=["Name", "Tagline", "Description", "Score"]),
147
  examples=examples,
148
  title="SloganAI – Startup Recommendation & Slogan Generator",
149
  description="Enter a startup idea and get top-3 similar startups + 1 generated slogan."