Deploy refined v2 slogan generator with Gradio UI
Browse files
app.py
CHANGED
|
@@ -2,118 +2,97 @@
|
|
| 2 |
import gradio as gr
|
| 3 |
import pandas as pd
|
| 4 |
import numpy as np
|
| 5 |
-
|
| 6 |
-
import
|
| 7 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 8 |
-
import torch
|
| 9 |
-
import re
|
| 10 |
|
| 11 |
-
#
|
| 12 |
-
GEN_TOK = AutoTokenizer.from_pretrained("google/flan-t5-
|
| 13 |
-
GEN_MODEL = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-
|
| 14 |
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 15 |
GEN_MODEL = GEN_MODEL.to(DEVICE)
|
| 16 |
|
| 17 |
-
|
| 18 |
-
|
| 19 |
|
| 20 |
-
#
|
| 21 |
-
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
BLOCK_PATTERNS = [
|
| 24 |
r"^[A-Z][a-z]+ [A-Z][a-z]+ (Platform|Solution|System|Application|Marketplace)$",
|
| 25 |
r"^[A-Z][a-z]+ [A-Z][a-z]+$",
|
| 26 |
-
r"^[A-Z][a-z]+$"
|
| 27 |
]
|
| 28 |
-
FORBIDDEN_WORDS = {
|
| 29 |
-
"app","assistant","platform","solution","system","marketplace",
|
| 30 |
-
"ai","machine learning","augmented reality","virtual reality",
|
| 31 |
-
"decentralized","empower"
|
| 32 |
-
}
|
| 33 |
|
| 34 |
-
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
-
def
|
| 38 |
-
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
def _is_blocked_slogan(s: str) -> bool:
|
| 42 |
-
|
|
|
|
| 43 |
return True
|
| 44 |
for pat in BLOCK_PATTERNS:
|
| 45 |
if re.match(pat, s.strip()):
|
| 46 |
return True
|
| 47 |
-
low = s.lower()
|
| 48 |
-
for w in FORBIDDEN_WORDS:
|
| 49 |
-
if w in low:
|
| 50 |
-
return True
|
| 51 |
return False
|
| 52 |
|
| 53 |
-
def
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
text = " ".join(words[:max_words])
|
| 69 |
-
# Soft title case
|
| 70 |
-
out_words = []
|
| 71 |
-
for w in text.split():
|
| 72 |
-
out_words.append(w if w.isupper() else w.capitalize())
|
| 73 |
-
return " ".join(out_words)
|
| 74 |
-
|
| 75 |
-
def _marketing_score(s: str) -> float:
|
| 76 |
-
words = set(w.lower() for w in s.split())
|
| 77 |
-
verb_hits = len(words & MARKETING_VERBS)
|
| 78 |
-
benefit_hits = len(words & BENEFIT_WORDS)
|
| 79 |
-
return min(1.0, 0.25 * verb_hits + 0.25 * benefit_hits)
|
| 80 |
-
|
| 81 |
-
def recommend(query: str, top_k: int = 3) -> pd.DataFrame:
|
| 82 |
-
"""Return top_k items most similar to the query based on description embeddings."""
|
| 83 |
-
query_vec = embed_model.encode([query])
|
| 84 |
-
faiss.normalize_L2(query_vec)
|
| 85 |
-
scores, idx = index.search(query_vec, top_k)
|
| 86 |
-
results = data.iloc[idx[0]].copy()
|
| 87 |
-
results["score"] = scores[0]
|
| 88 |
-
return results[["name", "tagline", "description", "score"]]
|
| 89 |
-
|
| 90 |
-
def generate_slogan(query_text: str, neighbors_df: pd.DataFrame = None, n_samples: int = 16) -> str:
|
| 91 |
-
"""
|
| 92 |
-
Generate multiple slogans using FLAN-T5, filter and score them,
|
| 93 |
-
then return the best slogan based on semantic similarity and marketing tone.
|
| 94 |
-
"""
|
| 95 |
-
ctx_lines = []
|
| 96 |
-
if neighbors_df is not None and not neighbors_df.empty:
|
| 97 |
-
for _, row in neighbors_df.head(3).iterrows():
|
| 98 |
-
tg = str(row.get("tagline", "")).strip()
|
| 99 |
-
if 5 <= len(tg) <= 70:
|
| 100 |
-
ctx_lines.append(f"- {tg}")
|
| 101 |
-
context = "\n".join(ctx_lines)
|
| 102 |
prompt = (
|
| 103 |
"You are a creative brand copywriter. Write short, original, memorable startup slogans (max 8 words).\n"
|
| 104 |
"Forbidden words: app, assistant, platform, solution, system, marketplace, AI, machine learning, augmented reality, virtual reality, decentralized, empower.\n"
|
| 105 |
-
"Focus on
|
| 106 |
-
"
|
| 107 |
-
"Description: AI assistant for doctors to prioritize patient cases\n"
|
| 108 |
-
"Slogan: Less Guessing. More Healing.\n\n"
|
| 109 |
-
"Description: Payments for small online stores\n"
|
| 110 |
-
"Slogan: Built To Grow With Your Cart.\n\n"
|
| 111 |
-
"Description: Neurotech headset to boost focus\n"
|
| 112 |
-
"Slogan: Train Your Brain To Win.\n\n"
|
| 113 |
)
|
| 114 |
-
if context:
|
| 115 |
-
prompt += f"Similar taglines (style only):\n{context}\n\n"
|
| 116 |
-
prompt += f"Description: {query_text}\nSlogans:"
|
| 117 |
|
| 118 |
input_ids = GEN_TOK(prompt, return_tensors="pt").input_ids.to(DEVICE)
|
| 119 |
outputs = GEN_MODEL.generate(
|
|
@@ -123,80 +102,36 @@ def generate_slogan(query_text: str, neighbors_df: pd.DataFrame = None, n_sample
|
|
| 123 |
top_k=60,
|
| 124 |
top_p=0.92,
|
| 125 |
temperature=1.2,
|
| 126 |
-
num_return_sequences=n_samples
|
| 127 |
-
repetition_penalty=1.08
|
| 128 |
)
|
| 129 |
-
raw_texts = [GEN_TOK.decode(o, skip_special_tokens=True) for o in outputs]
|
| 130 |
|
| 131 |
-
|
| 132 |
-
|
|
|
|
|
|
|
| 133 |
for line in txt.split("\n"):
|
| 134 |
-
s =
|
| 135 |
-
if not s
|
| 136 |
-
|
| 137 |
-
if _is_blocked_slogan(s):
|
| 138 |
-
|
| 139 |
-
# Avoid copying neighbor taglines
|
| 140 |
-
skip = False
|
| 141 |
-
if neighbors_df is not None and not neighbors_df.empty:
|
| 142 |
-
for _, row in neighbors_df.iterrows():
|
| 143 |
-
tg = str(row.get("tagline", "")).strip()
|
| 144 |
-
if not tg:
|
| 145 |
-
continue
|
| 146 |
-
if s.lower() == tg.lower():
|
| 147 |
-
skip = True
|
| 148 |
-
break
|
| 149 |
-
if _jaccard(_tokens(s), _tokens(tg.lower())) >= 0.7:
|
| 150 |
-
skip = True
|
| 151 |
-
break
|
| 152 |
-
if skip:
|
| 153 |
-
continue
|
| 154 |
-
candidates.add(s)
|
| 155 |
-
|
| 156 |
-
if not candidates:
|
| 157 |
-
first = _clean_line(raw_texts[0])
|
| 158 |
-
return first if first else query_text
|
| 159 |
-
|
| 160 |
-
query_vec = embed_model.encode([query_text])[0]
|
| 161 |
-
query_vec = query_vec / np.linalg.norm(query_vec)
|
| 162 |
-
scored = []
|
| 163 |
-
for s in candidates:
|
| 164 |
-
s_vec = embed_model.encode([s])[0]
|
| 165 |
-
s_vec = s_vec / np.linalg.norm(s_vec)
|
| 166 |
-
similarity = float(np.dot(query_vec, s_vec))
|
| 167 |
-
brevity = 1.0 - min(1.0, abs(len(s.split()) - 5) / 5.0)
|
| 168 |
-
marketing = _marketing_score(s)
|
| 169 |
-
generic = _generic_penalty(s)
|
| 170 |
-
for_pen = _for_penalty(s)
|
| 171 |
-
score = 0.6*similarity + 0.2*brevity + 0.2*marketing - 0.05*generic - 0.05*for_pen
|
| 172 |
-
scored.append((s, score))
|
| 173 |
-
scored.sort(key=lambda x: x[1], reverse=True)
|
| 174 |
-
return scored[0][0]
|
| 175 |
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
"name": ["HowDidIDo", "Museotainment", "Movitr"],
|
| 179 |
-
"tagline": ["Online evaluation platform", "PacMan & Louvre meet", "Crowdsourced video translation"],
|
| 180 |
-
"description": [
|
| 181 |
-
"Public speaking, Presentation skills and interview practice",
|
| 182 |
-
"Interactive AR museum tours",
|
| 183 |
-
"Video translation with voice and subtitles"
|
| 184 |
-
]
|
| 185 |
-
})
|
| 186 |
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
index = faiss.IndexFlatIP(data_vecs.shape[1])
|
| 191 |
-
index.add(data_vecs)
|
| 192 |
|
|
|
|
| 193 |
def pipeline(user_input):
|
| 194 |
recs = recommend(user_input, top_k=3)
|
| 195 |
-
slogan = generate_slogan(user_input
|
| 196 |
recs = recs.reset_index(drop=True)
|
| 197 |
recs.loc[len(recs)] = ["Generated Slogan", slogan, user_input, np.nan]
|
| 198 |
return recs
|
| 199 |
|
|
|
|
| 200 |
examples = [
|
| 201 |
"AI coach for improving public speaking skills",
|
| 202 |
"Augmented reality app for interactive museum tours",
|
|
@@ -208,7 +143,7 @@ examples = [
|
|
| 208 |
demo = gr.Interface(
|
| 209 |
fn=pipeline,
|
| 210 |
inputs=gr.Textbox(label="Enter a startup description"),
|
| 211 |
-
outputs=gr.Dataframe(headers=["Name","Tagline","Description","Score"]),
|
| 212 |
examples=examples,
|
| 213 |
title="SloganAI – Startup Recommendation & Slogan Generator",
|
| 214 |
description="Enter a startup idea and get top-3 similar startups + 1 generated slogan."
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
import pandas as pd
|
| 4 |
import numpy as np
|
| 5 |
+
import faiss, re, torch
|
| 6 |
+
from sentence_transformers import SentenceTransformer, CrossEncoder
|
| 7 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
|
|
|
|
|
|
| 8 |
|
| 9 |
+
# ------------------ Models ------------------
|
| 10 |
+
GEN_TOK = AutoTokenizer.from_pretrained("google/flan-t5-large")
|
| 11 |
+
GEN_MODEL = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")
|
| 12 |
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 13 |
GEN_MODEL = GEN_MODEL.to(DEVICE)
|
| 14 |
|
| 15 |
+
EMBED_MODEL = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
|
| 16 |
+
RERANKER = CrossEncoder("cross-encoder/stsb-roberta-base")
|
| 17 |
|
| 18 |
+
# ------------------ Dummy dataset (for demo) ------------------
|
| 19 |
+
data = pd.DataFrame({
|
| 20 |
+
"name": ["HowDidIDo", "Museotainment", "Movitr"],
|
| 21 |
+
"tagline": ["Online evaluation platform", "PacMan & Louvre meet", "Crowdsourced video translation"],
|
| 22 |
+
"description": [
|
| 23 |
+
"Public speaking, Presentation skills and interview practice",
|
| 24 |
+
"Interactive AR museum tours",
|
| 25 |
+
"Video translation with voice and subtitles"
|
| 26 |
+
]
|
| 27 |
+
})
|
| 28 |
+
|
| 29 |
+
# Build FAISS index
|
| 30 |
+
data_vecs = EMBED_MODEL.encode(data["description"].tolist())
|
| 31 |
+
faiss.normalize_L2(data_vecs)
|
| 32 |
+
index = faiss.IndexFlatIP(data_vecs.shape[1])
|
| 33 |
+
index.add(data_vecs)
|
| 34 |
+
|
| 35 |
+
def recommend(query, top_k=3):
|
| 36 |
+
query_vec = EMBED_MODEL.encode([query])
|
| 37 |
+
faiss.normalize_L2(query_vec)
|
| 38 |
+
scores, idx = index.search(query_vec, top_k)
|
| 39 |
+
results = data.iloc[idx[0]].copy()
|
| 40 |
+
results["score"] = scores[0]
|
| 41 |
+
return results[["name", "tagline", "description", "score"]]
|
| 42 |
+
|
| 43 |
+
# ------------------ Helpers ------------------
|
| 44 |
BLOCK_PATTERNS = [
|
| 45 |
r"^[A-Z][a-z]+ [A-Z][a-z]+ (Platform|Solution|System|Application|Marketplace)$",
|
| 46 |
r"^[A-Z][a-z]+ [A-Z][a-z]+$",
|
| 47 |
+
r"^[A-Z][a-z]+$",
|
| 48 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
+
HARD_BLOCK_WORDS = {"platform","solution","system","application","marketplace",
|
| 51 |
+
"ai-powered","ai powered","empower","empowering",
|
| 52 |
+
"artificial intelligence","machine learning","augmented reality","virtual reality"}
|
| 53 |
+
GENERIC_WORDS = {"app","assistant","smart","ai","ml","ar","vr","decentralized","blockchain"}
|
| 54 |
+
MARKETING_VERBS = {"build","grow","simplify","discover","create","connect","transform","unlock","boost","learn"}
|
| 55 |
+
BENEFIT_WORDS = {"faster","smarter","easier","better","safer","clearer"}
|
| 56 |
|
| 57 |
+
def _clean_slogan(text: str, max_words: int = 8) -> str:
|
| 58 |
+
text = text.strip().split("\n")[0]
|
| 59 |
+
text = re.sub(r"[\"“”‘’]", "", text)
|
| 60 |
+
text = re.sub(r"\s+", " ", text).strip()
|
| 61 |
+
words = text.split()
|
| 62 |
+
if len(words) > max_words:
|
| 63 |
+
text = " ".join(words[:max_words])
|
| 64 |
+
return text
|
| 65 |
|
| 66 |
def _is_blocked_slogan(s: str) -> bool:
|
| 67 |
+
s_low = s.lower()
|
| 68 |
+
if any(w in s_low for w in HARD_BLOCK_WORDS):
|
| 69 |
return True
|
| 70 |
for pat in BLOCK_PATTERNS:
|
| 71 |
if re.match(pat, s.strip()):
|
| 72 |
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
return False
|
| 74 |
|
| 75 |
+
def _score_candidates(query: str, cands: list) -> list:
|
| 76 |
+
if not cands:
|
| 77 |
+
return []
|
| 78 |
+
ce_scores = np.asarray(RERANKER.predict([(query, s) for s in cands]), dtype=np.float32) / 5.0
|
| 79 |
+
results = []
|
| 80 |
+
for i, s in enumerate(cands):
|
| 81 |
+
words = s.split()
|
| 82 |
+
brevity = 1.0 - min(1.0, abs(len(words) - 5) / 5.0)
|
| 83 |
+
marketing = 0.2*len(set(words) & MARKETING_VERBS) + 0.2*len(set(words) & BENEFIT_WORDS)
|
| 84 |
+
score = 0.6*float(ce_scores[i]) + 0.2*brevity + 0.2*marketing
|
| 85 |
+
results.append((s, float(score)))
|
| 86 |
+
return results
|
| 87 |
+
|
| 88 |
+
# ------------------ Generator ------------------
|
| 89 |
+
def generate_slogan(query_text: str, n_samples: int = 16) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
prompt = (
|
| 91 |
"You are a creative brand copywriter. Write short, original, memorable startup slogans (max 8 words).\n"
|
| 92 |
"Forbidden words: app, assistant, platform, solution, system, marketplace, AI, machine learning, augmented reality, virtual reality, decentralized, empower.\n"
|
| 93 |
+
"Focus on benefits and vivid verbs. Do not copy the description.\n\n"
|
| 94 |
+
f"Description: {query_text}\nSlogans:"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
)
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
input_ids = GEN_TOK(prompt, return_tensors="pt").input_ids.to(DEVICE)
|
| 98 |
outputs = GEN_MODEL.generate(
|
|
|
|
| 102 |
top_k=60,
|
| 103 |
top_p=0.92,
|
| 104 |
temperature=1.2,
|
| 105 |
+
num_return_sequences=n_samples
|
|
|
|
| 106 |
)
|
|
|
|
| 107 |
|
| 108 |
+
raw_cands = [GEN_TOK.decode(o, skip_special_tokens=True) for o in outputs]
|
| 109 |
+
|
| 110 |
+
cand_set = set()
|
| 111 |
+
for txt in raw_cands:
|
| 112 |
for line in txt.split("\n"):
|
| 113 |
+
s = _clean_slogan(line)
|
| 114 |
+
if not s: continue
|
| 115 |
+
if len(s.split()) < 2 or len(s.split()) > 8: continue
|
| 116 |
+
if _is_blocked_slogan(s): continue
|
| 117 |
+
cand_set.add(s.capitalize())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
|
| 119 |
+
if not cand_set:
|
| 120 |
+
return "Fresh Ideas, Built To Scale"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
|
| 122 |
+
scored = _score_candidates(query_text, sorted(cand_set))
|
| 123 |
+
scored.sort(key=lambda x: x[1], reverse=True)
|
| 124 |
+
return scored[0][0] if scored else "Fresh Ideas, Built To Scale"
|
|
|
|
|
|
|
| 125 |
|
| 126 |
+
# ------------------ Pipeline ------------------
|
| 127 |
def pipeline(user_input):
|
| 128 |
recs = recommend(user_input, top_k=3)
|
| 129 |
+
slogan = generate_slogan(user_input)
|
| 130 |
recs = recs.reset_index(drop=True)
|
| 131 |
recs.loc[len(recs)] = ["Generated Slogan", slogan, user_input, np.nan]
|
| 132 |
return recs
|
| 133 |
|
| 134 |
+
# ------------------ Gradio UI ------------------
|
| 135 |
examples = [
|
| 136 |
"AI coach for improving public speaking skills",
|
| 137 |
"Augmented reality app for interactive museum tours",
|
|
|
|
| 143 |
demo = gr.Interface(
|
| 144 |
fn=pipeline,
|
| 145 |
inputs=gr.Textbox(label="Enter a startup description"),
|
| 146 |
+
outputs=gr.Dataframe(headers=["Name", "Tagline", "Description", "Score"]),
|
| 147 |
examples=examples,
|
| 148 |
title="SloganAI – Startup Recommendation & Slogan Generator",
|
| 149 |
description="Enter a startup idea and get top-3 similar startups + 1 generated slogan."
|