|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
|
import re |
|
|
import json |
|
|
import time |
|
|
import random |
|
|
from typing import List, Dict, Optional |
|
|
|
|
|
import requests |
|
|
import pandas as pd |
|
|
import streamlit as st |
|
|
from bs4 import BeautifulSoup |
|
|
|
|
|
try: |
|
|
|
|
|
from huggingface_hub import InferenceClient |
|
|
except Exception: |
|
|
InferenceClient = None |
|
|
|
|
|
APP_TITLE = "About→Taglines: LLM-Powered Tagline Generator" |
|
|
DEFAULT_URL = "https://www.codestratlabs.com/#about" |
|
|
|
|
|
|
|
|
|
|
|
DEFAULT_MODEL = "mistralai/Mistral-7B-Instruct-v0.3" |
|
|
|
|
|
st.set_page_config(page_title=APP_TITLE, page_icon="🪄", layout="wide") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_hf_token() -> Optional[str]: |
|
|
|
|
|
for k in [ |
|
|
"HUGGINGFACE_API_TOKEN", |
|
|
"HF_TOKEN", |
|
|
"HUGGINGFACEHUB_API_TOKEN", |
|
|
"HF_API_TOKEN", |
|
|
]: |
|
|
try: |
|
|
if k in st.secrets and st.secrets[k]: |
|
|
return st.secrets[k] |
|
|
except Exception: |
|
|
pass |
|
|
if os.getenv(k): |
|
|
return os.getenv(k) |
|
|
return None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
USER_AGENT = ( |
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) " |
|
|
"AppleWebKit/537.36 (KHTML, like Gecko) " |
|
|
"Chrome/115.0 Safari/537.36" |
|
|
) |
|
|
|
|
|
ALLOWED_TAGS = { |
|
|
"p", "h1", "h2", "h3", "h4", "li", "blockquote", "em", "strong", "span" |
|
|
} |
|
|
|
|
|
|
|
|
def fetch_about_text(url: str, timeout: int = 15) -> str: |
|
|
"""Fetch the page, extract readable text, and lightly clean it. |
|
|
Not bulletproof but good enough for most marketing About pages. |
|
|
""" |
|
|
headers = {"User-Agent": USER_AGENT} |
|
|
r = requests.get(url, headers=headers, timeout=timeout) |
|
|
r.raise_for_status() |
|
|
|
|
|
soup = BeautifulSoup(r.text, "lxml") |
|
|
|
|
|
|
|
|
candidates = [] |
|
|
|
|
|
about_like = soup.select('[id*="about" i], [class*="about" i]') |
|
|
if about_like: |
|
|
candidates.extend(about_like) |
|
|
|
|
|
|
|
|
if not candidates: |
|
|
main = soup.find("main") or soup.body |
|
|
if main: |
|
|
candidates.append(main) |
|
|
|
|
|
chunks = [] |
|
|
for node in candidates: |
|
|
for tag in node.find_all(ALLOWED_TAGS): |
|
|
text = tag.get_text(" ", strip=True) |
|
|
if text: |
|
|
chunks.append(text) |
|
|
|
|
|
text = "\n".join(chunks) |
|
|
|
|
|
|
|
|
text = re.sub(r"\s+", " ", text) |
|
|
text = re.sub(r"(©|Copyright).*?\d{4}.*", "", text, flags=re.I) |
|
|
text = text.strip() |
|
|
|
|
|
|
|
|
return text[:4000] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
SYSTEM_PROMPT = ( |
|
|
"You are a world-class brand copywriter. Given a company 'About Us' " |
|
|
"description and some creative directions, craft concise, memorable, and " |
|
|
"distinctive marketing taglines that would perform well on landing pages, " |
|
|
"social headers, and ads. Always return valid JSON." |
|
|
) |
|
|
|
|
|
USER_PROMPT_TEMPLATE = ( |
|
|
"""Write {n} creative marketing taglines for the brand described below. |
|
|
Constraints: |
|
|
- Each tagline max {max_words} words. |
|
|
- Tone(s): {tones}. |
|
|
- Target audience: {audience}. |
|
|
- Brand traits to emphasize: {traits}. |
|
|
- Language: {language}. |
|
|
- Avoid clichés. Avoid generic buzzwords. Prefer clarity over fluff. |
|
|
- Make each line unique; avoid repeating structures. |
|
|
- {style_rule} |
|
|
|
|
|
|
|
|
Company About (verbatim, possibly trimmed): |
|
|
""""{about}""" |
|
|
|
|
|
|
|
|
Return JSON with this exact schema: |
|
|
{ |
|
|
"taglines": [ |
|
|
{ |
|
|
"line": string, |
|
|
"explanation": string |
|
|
} |
|
|
] |
|
|
} |
|
|
""" |
|
|
) |
|
|
|
|
|
STYLE_RULES = { |
|
|
"One-liners": "Only produce single-line taglines; do not add subheads.", |
|
|
"Slogan + Subhead": ( |
|
|
"Produce single-line slogan candidates; keep explanations focused on the angle." |
|
|
), |
|
|
"Alliterative": "Favor gentle alliteration (not forced).", |
|
|
"Bold & Punchy": "Favor short, high-impact phrasing.", |
|
|
} |
|
|
|
|
|
|
|
|
def call_hf_inference(model: str, messages: List[Dict[str, str]], temperature: float = 0.7, max_new_tokens: int = 512, top_p: float = 0.9, seed: Optional[int] = None) -> str: |
|
|
token = get_hf_token() |
|
|
if not token: |
|
|
raise RuntimeError( |
|
|
"No Hugging Face API token found. Set HUGGINGFACE_API_TOKEN (or HF_TOKEN) as a Space secret." |
|
|
) |
|
|
|
|
|
if InferenceClient is None: |
|
|
# Minimal fallback via raw HTTP to Inference API |
|
|
api_url = f"https://api-inference.huggingface.co/models/{model}" |
|
|
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"} |
|
|
payload = { |
|
|
"inputs": messages, |
|
|
"parameters": { |
|
|
"temperature": temperature, |
|
|
"top_p": top_p, |
|
|
"max_new_tokens": max_new_tokens, |
|
|
**({"seed": seed} if seed is not None else {}), |
|
|
}, |
|
|
"task": "conversational", |
|
|
} |
|
|
resp = requests.post(api_url, headers=headers, data=json.dumps(payload), timeout=60) |
|
|
resp.raise_for_status() |
|
|
data = resp.json() |
|
|
# Best-effort extraction |
|
|
try: |
|
|
return data[0]["generated_text"] |
|
|
except Exception: |
|
|
return json.dumps(data) |
|
|
|
|
|
client = InferenceClient(model=model, token=token) |
|
|
|
|
|
# Build a chat-style input for instruct models |
|
|
# Many HF chat models accept a list of dicts with role/content |
|
|
generated = client.chat.completions.create( |
|
|
messages=messages, |
|
|
temperature=temperature, |
|
|
top_p=top_p, |
|
|
max_tokens=max_new_tokens, |
|
|
seed=seed, |
|
|
) |
|
|
return generated.choices[0].message.content |
|
|
|
|
|
|
|
|
# ----------------------------- |
|
|
# UI |
|
|
# ----------------------------- |
|
|
|
|
|
st.title("🪄 Tagline Generator from About Us (LLM)") |
|
|
with st.expander("How it works", expanded=False): |
|
|
st.markdown( |
|
|
"1. Paste an About page URL (or raw text).\n" |
|
|
"2. Choose tone, style, and constraints.\n" |
|
|
"3. Click **Generate** to get multiple tagline options.\n" |
|
|
"4. Copy, edit, or download as CSV." |
|
|
) |
|
|
|
|
|
with st.sidebar: |
|
|
st.header("Input Source") |
|
|
url = st.text_input("About page URL", value=DEFAULT_URL) |
|
|
st.caption("Tip: Works best with dedicated About/Company pages.") |
|
|
st.divider() |
|
|
raw_text = st.text_area( |
|
|
"…or paste About text (overrides URL if provided)", |
|
|
height=160, |
|
|
placeholder="Paste company description here…" |
|
|
) |
|
|
|
|
|
st.header("Creative Controls") |
|
|
n = st.slider("# of taglines", min_value=3, max_value=30, value=12) |
|
|
max_words = st.slider("Max words per tagline", min_value=3, max_value=12, value=7) |
|
|
|
|
|
tone_options = [ |
|
|
"Bold & Punchy", |
|
|
"Credible & Trustworthy", |
|
|
"Visionary & Innovative", |
|
|
"Friendly & Helpful", |
|
|
"Premium & Sophisticated", |
|
|
"Playful & Witty", |
|
|
"Tech-forward & Precise", |
|
|
] |
|
|
tones = st.multiselect("Tone(s)", tone_options, default=["Bold & Punchy", "Tech-forward & Precise"]) or ["Clear & Confident"] |
|
|
|
|
|
style_choice = st.selectbox("Style bias", ["One-liners", "Slogan + Subhead", "Alliterative", "Bold & Punchy"], index=0) |
|
|
|
|
|
audience = st.text_input("Target audience", value="B2B founders, product & growth leaders") |
|
|
traits = st.text_input("Brand traits to highlight", value="AI-native, reliable delivery, measurable impact") |
|
|
|
|
|
language = st.text_input("Language (e.g., English, Hindi)", value="English") |
|
|
|
|
|
temperature = st.slider("Creativity (temperature)", 0.0, 1.5, 0.8, 0.1) |
|
|
top_p = st.slider("Nucleus sampling (top_p)", 0.1, 1.0, 0.9, 0.05) |
|
|
seed_toggle = st.checkbox("Use seed for reproducibility", value=False) |
|
|
seed_val = st.number_input("Seed", min_value=0, max_value=10_000_000, value=42, step=1, disabled=not seed_toggle) |
|
|
|
|
|
st.divider() |
|
|
st.subheader("Model") |
|
|
model = st.text_input("HF Inference model id", value=DEFAULT_MODEL) |
|
|
st.caption("Use any instruct/chat model available on the Inference API.") |
|
|
|
|
|
col1, col2 = st.columns([2, 1]) |
|
|
|
|
|
with col1: |
|
|
st.subheader("1) Fetch About content") |
|
|
about_text = None |
|
|
if raw_text.strip(): |
|
|
about_text = raw_text.strip() |
|
|
st.success("Using pasted About text.") |
|
|
else: |
|
|
if st.button("Fetch from URL", use_container_width=True): |
|
|
try: |
|
|
with st.spinner("Fetching & parsing About page..."): |
|
|
about_text = fetch_about_text(url) |
|
|
if about_text: |
|
|
st.success(f"Fetched ~{len(about_text)} chars of About content.") |
|
|
else: |
|
|
st.warning("Couldn't extract meaningful About text—try pasting it manually.") |
|
|
except Exception as e: |
|
|
st.error(f"Fetch failed: {e}") |
|
|
|
|
|
about_holder = st.empty() |
|
|
if about_text: |
|
|
with about_holder.container(): |
|
|
st.text_area("About content used for generation", about_text, height=220) |
|
|
|
|
|
with col2: |
|
|
st.subheader("2) Generate Taglines") |
|
|
can_generate = st.button("🪄 Generate", use_container_width=True) |
|
|
|
|
|
st.markdown("---") |
|
|
|
|
|
results_df = None |
|
|
if can_generate: |
|
|
if not (raw_text.strip() or about_text): |
|
|
st.warning("Please paste About text or click 'Fetch from URL' first.") |
|
|
else: |
|
|
# Build messages for chat API |
|
|
style_rule = STYLE_RULES.get(style_choice, "") |
|
|
user_prompt = USER_PROMPT_TEMPLATE.format( |
|
|
n=n, |
|
|
max_words=max_words, |
|
|
tones=", ".join(tones), |
|
|
audience=audience, |
|
|
traits=traits, |
|
|
language=language, |
|
|
style_rule=style_rule, |
|
|
about=about_text or raw_text.strip(), |
|
|
) |
|
|
|
|
|
messages = [ |
|
|
{"role": "system", "content": SYSTEM_PROMPT}, |
|
|
{"role": "user", "content": user_prompt}, |
|
|
] |
|
|
|
|
|
with st.spinner("Asking the model for ideas…"): |
|
|
try: |
|
|
seed_used = seed_val if seed_toggle else None |
|
|
output_text = call_hf_inference( |
|
|
model=model, |
|
|
messages=messages, |
|
|
temperature=temperature, |
|
|
max_new_tokens=768, |
|
|
top_p=top_p, |
|
|
seed=seed_used, |
|
|
) |
|
|
except Exception as e: |
|
|
st.error(f"Generation failed: {e}") |
|
|
output_text = "" |
|
|
|
|
|
# Try parsing JSON reliably |
|
|
parsed = None |
|
|
if output_text: |
|
|
# Find the first JSON object in the string |
|
|
m = re.search(r"\{[\s\S]*\}", output_text) |
|
|
if m: |
|
|
try: |
|
|
parsed = json.loads(m.group(0)) |
|
|
except Exception: |
|
|
parsed = None |
|
|
|
|
|
taglines: List[Dict[str, str]] = [] |
|
|
if parsed and isinstance(parsed, dict) and "taglines" in parsed: |
|
|
for item in parsed["taglines"]: |
|
|
line = (item.get("line") or "").strip() |
|
|
expl = (item.get("explanation") or "").strip() |
|
|
if line: |
|
|
taglines.append({"Tagline": line, "Why it works": expl}) |
|
|
else: |
|
|
# Fallback: heuristic split by lines/bullets |
|
|
candidates = re.split(r"[\n•\-\d\)]\s+", output_text) |
|
|
for c in candidates: |
|
|
c = c.strip().strip('"').strip() |
|
|
if 0 < len(c.split()) <= max_words and 3 <= len(c) <= 90: |
|
|
taglines.append({"Tagline": c, "Why it works": ""}) |
|
|
|
|
|
if not taglines: |
|
|
st.warning("No taglines parsed. Try increasing max tokens or adjust constraints.") |
|
|
else: |
|
|
results_df = pd.DataFrame(taglines) |
|
|
|
|
|
if results_df is not None and not results_df.empty: |
|
|
st.subheader("Results") |
|
|
st.dataframe(results_df, use_container_width=True, hide_index=True) |
|
|
|
|
|
csv_bytes = results_df.to_csv(index=False).encode("utf-8") |
|
|
st.download_button( |
|
|
"Download CSV", |
|
|
data=csv_bytes, |
|
|
file_name="taglines.csv", |
|
|
mime="text/csv", |
|
|
use_container_width=True, |
|
|
) |
|
|
|
|
|
st.markdown(":sparkles: Tip: Click *Generate* again for fresh variations; toggle a seed for reproducibility.") |
|
|
|
|
|
st.markdown("---") |
|
|
|
|
|
with st.expander("Troubleshooting"): |
|
|
st.markdown( |
|
|
"- **401/403 errors** → Ensure your Space has a valid `HUGGINGFACE_API_TOKEN` secret with access to the selected model.\n" |
|
|
"- **Empty results** → Paste the About text manually, increase `max words`, or try a different model.\n" |
|
|
"- **Slow output** → Reduce `# of taglines` or switch to a lighter model.\n" |
|
|
"- **Different language** → Change the *Language* field; the model will write in that language." |
|
|
) |
|
|
|