# Streamlit Tagline Generator for "About Us" pages (Hugging Face Space) # --------------------------------------------------------------- # Deploy this file as `app.py` in a Hugging Face Space (Streamlit SDK). # # 🔧 Setup (on Hugging Face): # 1) Create a new Space → SDK: Streamlit → Python. # 2) Add this file as `app.py`. # 3) In **Settings → Variables & secrets**, add a secret named one of: # - HUGGINGFACE_API_TOKEN (preferred) # - HF_TOKEN (fallback) # 4) (Optional) In `README.md`, paste these Requirements (or keep them here): # # Requirements (auto-installed if you add a `requirements.txt`): # streamlit>=1.36.0 # requests>=2.31.0 # beautifulsoup4>=4.12.2 # lxml>=5.2.2 # huggingface_hub>=0.23.0 # pandas>=2.2.2 # # If you don't want a separate `requirements.txt`, the Space will still install common libs, # but it's best practice to include it. # --------------------------------------------------------------- import os import re import json import time import random from typing import List, Dict, Optional import requests import pandas as pd import streamlit as st from bs4 import BeautifulSoup try: # Lightweight client for Inference API from huggingface_hub import InferenceClient except Exception: InferenceClient = None # We'll gracefully handle if missing APP_TITLE = "About→Taglines: LLM-Powered Tagline Generator" DEFAULT_URL = "https://www.codestratlabs.com/#about" # Sensible, widely available open-instruct model on HF Inference API. # You may change this to any chat/instruct model you have access to. DEFAULT_MODEL = "mistralai/Mistral-7B-Instruct-v0.3" st.set_page_config(page_title=APP_TITLE, page_icon="🪄", layout="wide") # ----------------------------- # Helper: find HF token # ----------------------------- def get_hf_token() -> Optional[str]: # Priority: Streamlit secrets → env vars for k in [ "HUGGINGFACE_API_TOKEN", "HF_TOKEN", "HUGGINGFACEHUB_API_TOKEN", "HF_API_TOKEN", ]: try: if k in st.secrets and st.secrets[k]: return st.secrets[k] except Exception: pass if os.getenv(k): return os.getenv(k) return None # ----------------------------- # Web scraping utilities # ----------------------------- USER_AGENT = ( "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/115.0 Safari/537.36" ) ALLOWED_TAGS = { "p", "h1", "h2", "h3", "h4", "li", "blockquote", "em", "strong", "span" } def fetch_about_text(url: str, timeout: int = 15) -> str: """Fetch the page, extract readable text, and lightly clean it. Not bulletproof but good enough for most marketing About pages. """ headers = {"User-Agent": USER_AGENT} r = requests.get(url, headers=headers, timeout=timeout) r.raise_for_status() soup = BeautifulSoup(r.text, "lxml") # Try common About selectors first candidates = [] # id or class containing 'about' about_like = soup.select('[id*="about" i], [class*="about" i]') if about_like: candidates.extend(about_like) # Fallback: main content if not candidates: main = soup.find("main") or soup.body if main: candidates.append(main) chunks = [] for node in candidates: for tag in node.find_all(ALLOWED_TAGS): text = tag.get_text(" ", strip=True) if text: chunks.append(text) text = "\n".join(chunks) # Clean up extra whitespace and junk text = re.sub(r"\s+", " ", text) text = re.sub(r"(©|Copyright).*?\d{4}.*", "", text, flags=re.I) text = text.strip() # Limit to a sane context length for prompting return text[:4000] # ----------------------------- # Prompting & generation # ----------------------------- SYSTEM_PROMPT = ( "You are a world-class brand copywriter. Given a company 'About Us' " "description and some creative directions, craft concise, memorable, and " "distinctive marketing taglines that would perform well on landing pages, " "social headers, and ads. Always return valid JSON." ) USER_PROMPT_TEMPLATE = ( """Write {n} creative marketing taglines for the brand described below. Constraints: - Each tagline max {max_words} words. - Tone(s): {tones}. - Target audience: {audience}. - Brand traits to emphasize: {traits}. - Language: {language}. - Avoid clichés. Avoid generic buzzwords. Prefer clarity over fluff. - Make each line unique; avoid repeating structures. - {style_rule} Company About (verbatim, possibly trimmed): """"{about}""" Return JSON with this exact schema: { "taglines": [ { "line": string, "explanation": string } ] } """ ) STYLE_RULES = { "One-liners": "Only produce single-line taglines; do not add subheads.", "Slogan + Subhead": ( "Produce single-line slogan candidates; keep explanations focused on the angle." ), "Alliterative": "Favor gentle alliteration (not forced).", "Bold & Punchy": "Favor short, high-impact phrasing.", } def call_hf_inference(model: str, messages: List[Dict[str, str]], temperature: float = 0.7, max_new_tokens: int = 512, top_p: float = 0.9, seed: Optional[int] = None) -> str: token = get_hf_token() if not token: raise RuntimeError( "No Hugging Face API token found. Set HUGGINGFACE_API_TOKEN (or HF_TOKEN) as a Space secret." ) if InferenceClient is None: # Minimal fallback via raw HTTP to Inference API api_url = f"https://api-inference.huggingface.co/models/{model}" headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"} payload = { "inputs": messages, "parameters": { "temperature": temperature, "top_p": top_p, "max_new_tokens": max_new_tokens, **({"seed": seed} if seed is not None else {}), }, "task": "conversational", } resp = requests.post(api_url, headers=headers, data=json.dumps(payload), timeout=60) resp.raise_for_status() data = resp.json() # Best-effort extraction try: return data[0]["generated_text"] except Exception: return json.dumps(data) client = InferenceClient(model=model, token=token) # Build a chat-style input for instruct models # Many HF chat models accept a list of dicts with role/content generated = client.chat.completions.create( messages=messages, temperature=temperature, top_p=top_p, max_tokens=max_new_tokens, seed=seed, ) return generated.choices[0].message.content # ----------------------------- # UI # ----------------------------- st.title("🪄 Tagline Generator from About Us (LLM)") with st.expander("How it works", expanded=False): st.markdown( "1. Paste an About page URL (or raw text).\n" "2. Choose tone, style, and constraints.\n" "3. Click **Generate** to get multiple tagline options.\n" "4. Copy, edit, or download as CSV." ) with st.sidebar: st.header("Input Source") url = st.text_input("About page URL", value=DEFAULT_URL) st.caption("Tip: Works best with dedicated About/Company pages.") st.divider() raw_text = st.text_area( "…or paste About text (overrides URL if provided)", height=160, placeholder="Paste company description here…" ) st.header("Creative Controls") n = st.slider("# of taglines", min_value=3, max_value=30, value=12) max_words = st.slider("Max words per tagline", min_value=3, max_value=12, value=7) tone_options = [ "Bold & Punchy", "Credible & Trustworthy", "Visionary & Innovative", "Friendly & Helpful", "Premium & Sophisticated", "Playful & Witty", "Tech-forward & Precise", ] tones = st.multiselect("Tone(s)", tone_options, default=["Bold & Punchy", "Tech-forward & Precise"]) or ["Clear & Confident"] style_choice = st.selectbox("Style bias", ["One-liners", "Slogan + Subhead", "Alliterative", "Bold & Punchy"], index=0) audience = st.text_input("Target audience", value="B2B founders, product & growth leaders") traits = st.text_input("Brand traits to highlight", value="AI-native, reliable delivery, measurable impact") language = st.text_input("Language (e.g., English, Hindi)", value="English") temperature = st.slider("Creativity (temperature)", 0.0, 1.5, 0.8, 0.1) top_p = st.slider("Nucleus sampling (top_p)", 0.1, 1.0, 0.9, 0.05) seed_toggle = st.checkbox("Use seed for reproducibility", value=False) seed_val = st.number_input("Seed", min_value=0, max_value=10_000_000, value=42, step=1, disabled=not seed_toggle) st.divider() st.subheader("Model") model = st.text_input("HF Inference model id", value=DEFAULT_MODEL) st.caption("Use any instruct/chat model available on the Inference API.") col1, col2 = st.columns([2, 1]) with col1: st.subheader("1) Fetch About content") about_text = None if raw_text.strip(): about_text = raw_text.strip() st.success("Using pasted About text.") else: if st.button("Fetch from URL", use_container_width=True): try: with st.spinner("Fetching & parsing About page..."): about_text = fetch_about_text(url) if about_text: st.success(f"Fetched ~{len(about_text)} chars of About content.") else: st.warning("Couldn't extract meaningful About text—try pasting it manually.") except Exception as e: st.error(f"Fetch failed: {e}") about_holder = st.empty() if about_text: with about_holder.container(): st.text_area("About content used for generation", about_text, height=220) with col2: st.subheader("2) Generate Taglines") can_generate = st.button("🪄 Generate", use_container_width=True) st.markdown("---") results_df = None if can_generate: if not (raw_text.strip() or about_text): st.warning("Please paste About text or click 'Fetch from URL' first.") else: # Build messages for chat API style_rule = STYLE_RULES.get(style_choice, "") user_prompt = USER_PROMPT_TEMPLATE.format( n=n, max_words=max_words, tones=", ".join(tones), audience=audience, traits=traits, language=language, style_rule=style_rule, about=about_text or raw_text.strip(), ) messages = [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": user_prompt}, ] with st.spinner("Asking the model for ideas…"): try: seed_used = seed_val if seed_toggle else None output_text = call_hf_inference( model=model, messages=messages, temperature=temperature, max_new_tokens=768, top_p=top_p, seed=seed_used, ) except Exception as e: st.error(f"Generation failed: {e}") output_text = "" # Try parsing JSON reliably parsed = None if output_text: # Find the first JSON object in the string m = re.search(r"\{[\s\S]*\}", output_text) if m: try: parsed = json.loads(m.group(0)) except Exception: parsed = None taglines: List[Dict[str, str]] = [] if parsed and isinstance(parsed, dict) and "taglines" in parsed: for item in parsed["taglines"]: line = (item.get("line") or "").strip() expl = (item.get("explanation") or "").strip() if line: taglines.append({"Tagline": line, "Why it works": expl}) else: # Fallback: heuristic split by lines/bullets candidates = re.split(r"[\n•\-\d\)]\s+", output_text) for c in candidates: c = c.strip().strip('"').strip() if 0 < len(c.split()) <= max_words and 3 <= len(c) <= 90: taglines.append({"Tagline": c, "Why it works": ""}) if not taglines: st.warning("No taglines parsed. Try increasing max tokens or adjust constraints.") else: results_df = pd.DataFrame(taglines) if results_df is not None and not results_df.empty: st.subheader("Results") st.dataframe(results_df, use_container_width=True, hide_index=True) csv_bytes = results_df.to_csv(index=False).encode("utf-8") st.download_button( "Download CSV", data=csv_bytes, file_name="taglines.csv", mime="text/csv", use_container_width=True, ) st.markdown(":sparkles: Tip: Click *Generate* again for fresh variations; toggle a seed for reproducibility.") st.markdown("---") with st.expander("Troubleshooting"): st.markdown( "- **401/403 errors** → Ensure your Space has a valid `HUGGINGFACE_API_TOKEN` secret with access to the selected model.\n" "- **Empty results** → Paste the About text manually, increase `max words`, or try a different model.\n" "- **Slow output** → Reduce `# of taglines` or switch to a lighter model.\n" "- **Different language** → Change the *Language* field; the model will write in that language." )