Spaces:

Parul-23
/

demo

Runtime error

App Files Files Community

demo / app.py

Parul-23

Update app.py

73733e2 verified 6 months ago

raw

history blame contribute delete

13.8 kB

	# Streamlit Tagline Generator for "About Us" pages (Hugging Face Space)
	# ---------------------------------------------------------------
	# Deploy this file as `app.py` in a Hugging Face Space (Streamlit SDK).
	#
	# 🔧 Setup (on Hugging Face):
	# 1) Create a new Space → SDK: Streamlit → Python.
	# 2) Add this file as `app.py`.
	# 3) In Settings → Variables & secrets, add a secret named one of:
	# - HUGGINGFACE_API_TOKEN (preferred)
	# - HF_TOKEN (fallback)
	# 4) (Optional) In `README.md`, paste these Requirements (or keep them here):
	#
	# Requirements (auto-installed if you add a `requirements.txt`):
	# streamlit>=1.36.0
	# requests>=2.31.0
	# beautifulsoup4>=4.12.2
	# lxml>=5.2.2
	# huggingface_hub>=0.23.0
	# pandas>=2.2.2
	#
	# If you don't want a separate `requirements.txt`, the Space will still install common libs,
	# but it's best practice to include it.
	# ---------------------------------------------------------------

	import os
	import re
	import json
	import time
	import random
	from typing import List, Dict, Optional

	import requests
	import pandas as pd
	import streamlit as st
	from bs4 import BeautifulSoup

	try:
	# Lightweight client for Inference API
	from huggingface_hub import InferenceClient
	except Exception:
	InferenceClient = None # We'll gracefully handle if missing

	APP_TITLE = "About→Taglines: LLM-Powered Tagline Generator"
	DEFAULT_URL = "https://www.codestratlabs.com/#about"

	# Sensible, widely available open-instruct model on HF Inference API.
	# You may change this to any chat/instruct model you have access to.
	DEFAULT_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"

	st.set_page_config(page_title=APP_TITLE, page_icon="🪄", layout="wide")

	# -----------------------------
	# Helper: find HF token
	# -----------------------------

	def get_hf_token() -> Optional[str]:
	# Priority: Streamlit secrets → env vars
	for k in [
	"HUGGINGFACE_API_TOKEN",
	"HF_TOKEN",
	"HUGGINGFACEHUB_API_TOKEN",
	"HF_API_TOKEN",
	]:
	try:
	if k in st.secrets and st.secrets[k]:
	return st.secrets[k]
	except Exception:
	pass
	if os.getenv(k):
	return os.getenv(k)
	return None

	# -----------------------------
	# Web scraping utilities
	# -----------------------------

	USER_AGENT = (
	"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
	"AppleWebKit/537.36 (KHTML, like Gecko) "
	"Chrome/115.0 Safari/537.36"
	)

	ALLOWED_TAGS = {
	"p", "h1", "h2", "h3", "h4", "li", "blockquote", "em", "strong", "span"
	}


	def fetch_about_text(url: str, timeout: int = 15) -> str:
	"""Fetch the page, extract readable text, and lightly clean it.
	Not bulletproof but good enough for most marketing About pages.
	"""
	headers = {"User-Agent": USER_AGENT}
	r = requests.get(url, headers=headers, timeout=timeout)
	r.raise_for_status()

	soup = BeautifulSoup(r.text, "lxml")

	# Try common About selectors first
	candidates = []
	# id or class containing 'about'
	about_like = soup.select('[id="about" i], [class="about" i]')
	if about_like:
	candidates.extend(about_like)

	# Fallback: main content
	if not candidates:
	main = soup.find("main") or soup.body
	if main:
	candidates.append(main)

	chunks = []
	for node in candidates:
	for tag in node.find_all(ALLOWED_TAGS):
	text = tag.get_text(" ", strip=True)
	if text:
	chunks.append(text)

	text = "\n".join(chunks)

	# Clean up extra whitespace and junk
	text = re.sub(r"\s+", " ", text)
	text = re.sub(r"(©\|Copyright).?\d{4}.", "", text, flags=re.I)
	text = text.strip()

	# Limit to a sane context length for prompting
	return text[:4000]

	# -----------------------------
	# Prompting & generation
	# -----------------------------

	SYSTEM_PROMPT = (
	"You are a world-class brand copywriter. Given a company 'About Us' "
	"description and some creative directions, craft concise, memorable, and "
	"distinctive marketing taglines that would perform well on landing pages, "
	"social headers, and ads. Always return valid JSON."
	)

	USER_PROMPT_TEMPLATE = (
	"""Write {n} creative marketing taglines for the brand described below.
	Constraints:
	- Each tagline max {max_words} words.
	- Tone(s): {tones}.
	- Target audience: {audience}.
	- Brand traits to emphasize: {traits}.
	- Language: {language}.
	- Avoid clichés. Avoid generic buzzwords. Prefer clarity over fluff.
	- Make each line unique; avoid repeating structures.
	- {style_rule}


	Company About (verbatim, possibly trimmed):
	""""{about}"""


	Return JSON with this exact schema:
	{
	"taglines": [
	{
	"line": string,
	"explanation": string
	}
	]
	}
	"""
	)

	STYLE_RULES = {
	"One-liners": "Only produce single-line taglines; do not add subheads.",
	"Slogan + Subhead": (
	"Produce single-line slogan candidates; keep explanations focused on the angle."
	),
	"Alliterative": "Favor gentle alliteration (not forced).",
	"Bold & Punchy": "Favor short, high-impact phrasing.",
	}


	def call_hf_inference(model: str, messages: List[Dict[str, str]], temperature: float = 0.7, max_new_tokens: int = 512, top_p: float = 0.9, seed: Optional[int] = None) -> str:
	token = get_hf_token()
	if not token:
	raise RuntimeError(
	"No Hugging Face API token found. Set HUGGINGFACE_API_TOKEN (or HF_TOKEN) as a Space secret."
	)

	if InferenceClient is None:
	# Minimal fallback via raw HTTP to Inference API
	api_url = f"https://api-inference.huggingface.co/models/{model}"
	headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
	payload = {
	"inputs": messages,
	"parameters": {
	"temperature": temperature,
	"top_p": top_p,
	"max_new_tokens": max_new_tokens,
	**({"seed": seed} if seed is not None else {}),
	},
	"task": "conversational",
	}
	resp = requests.post(api_url, headers=headers, data=json.dumps(payload), timeout=60)
	resp.raise_for_status()
	data = resp.json()
	# Best-effort extraction
	try:
	return data[0]["generated_text"]
	except Exception:
	return json.dumps(data)

	client = InferenceClient(model=model, token=token)

	# Build a chat-style input for instruct models
	# Many HF chat models accept a list of dicts with role/content
	generated = client.chat.completions.create(
	messages=messages,
	temperature=temperature,
	top_p=top_p,
	max_tokens=max_new_tokens,
	seed=seed,
	)
	return generated.choices[0].message.content


	# -----------------------------
	# UI
	# -----------------------------

	st.title("🪄 Tagline Generator from About Us (LLM)")
	with st.expander("How it works", expanded=False):
	st.markdown(
	"1. Paste an About page URL (or raw text).\n"
	"2. Choose tone, style, and constraints.\n"
	"3. Click Generate to get multiple tagline options.\n"
	"4. Copy, edit, or download as CSV."
	)

	with st.sidebar:
	st.header("Input Source")
	url = st.text_input("About page URL", value=DEFAULT_URL)
	st.caption("Tip: Works best with dedicated About/Company pages.")
	st.divider()
	raw_text = st.text_area(
	"…or paste About text (overrides URL if provided)",
	height=160,
	placeholder="Paste company description here…"
	)

	st.header("Creative Controls")
	n = st.slider("# of taglines", min_value=3, max_value=30, value=12)
	max_words = st.slider("Max words per tagline", min_value=3, max_value=12, value=7)

	tone_options = [
	"Bold & Punchy",
	"Credible & Trustworthy",
	"Visionary & Innovative",
	"Friendly & Helpful",
	"Premium & Sophisticated",
	"Playful & Witty",
	"Tech-forward & Precise",
	]
	tones = st.multiselect("Tone(s)", tone_options, default=["Bold & Punchy", "Tech-forward & Precise"]) or ["Clear & Confident"]

	style_choice = st.selectbox("Style bias", ["One-liners", "Slogan + Subhead", "Alliterative", "Bold & Punchy"], index=0)

	audience = st.text_input("Target audience", value="B2B founders, product & growth leaders")
	traits = st.text_input("Brand traits to highlight", value="AI-native, reliable delivery, measurable impact")

	language = st.text_input("Language (e.g., English, Hindi)", value="English")

	temperature = st.slider("Creativity (temperature)", 0.0, 1.5, 0.8, 0.1)
	top_p = st.slider("Nucleus sampling (top_p)", 0.1, 1.0, 0.9, 0.05)
	seed_toggle = st.checkbox("Use seed for reproducibility", value=False)
	seed_val = st.number_input("Seed", min_value=0, max_value=10_000_000, value=42, step=1, disabled=not seed_toggle)

	st.divider()
	st.subheader("Model")
	model = st.text_input("HF Inference model id", value=DEFAULT_MODEL)
	st.caption("Use any instruct/chat model available on the Inference API.")

	col1, col2 = st.columns([2, 1])

	with col1:
	st.subheader("1) Fetch About content")
	about_text = None
	if raw_text.strip():
	about_text = raw_text.strip()
	st.success("Using pasted About text.")
	else:
	if st.button("Fetch from URL", use_container_width=True):
	try:
	with st.spinner("Fetching & parsing About page..."):
	about_text = fetch_about_text(url)
	if about_text:
	st.success(f"Fetched ~{len(about_text)} chars of About content.")
	else:
	st.warning("Couldn't extract meaningful About text—try pasting it manually.")
	except Exception as e:
	st.error(f"Fetch failed: {e}")

	about_holder = st.empty()
	if about_text:
	with about_holder.container():
	st.text_area("About content used for generation", about_text, height=220)

	with col2:
	st.subheader("2) Generate Taglines")
	can_generate = st.button("🪄 Generate", use_container_width=True)

	st.markdown("---")

	results_df = None
	if can_generate:
	if not (raw_text.strip() or about_text):
	st.warning("Please paste About text or click 'Fetch from URL' first.")
	else:
	# Build messages for chat API
	style_rule = STYLE_RULES.get(style_choice, "")
	user_prompt = USER_PROMPT_TEMPLATE.format(
	n=n,
	max_words=max_words,
	tones=", ".join(tones),
	audience=audience,
	traits=traits,
	language=language,
	style_rule=style_rule,
	about=about_text or raw_text.strip(),
	)

	messages = [
	{"role": "system", "content": SYSTEM_PROMPT},
	{"role": "user", "content": user_prompt},
	]

	with st.spinner("Asking the model for ideas…"):
	try:
	seed_used = seed_val if seed_toggle else None
	output_text = call_hf_inference(
	model=model,
	messages=messages,
	temperature=temperature,
	max_new_tokens=768,
	top_p=top_p,
	seed=seed_used,
	)
	except Exception as e:
	st.error(f"Generation failed: {e}")
	output_text = ""

	# Try parsing JSON reliably
	parsed = None
	if output_text:
	# Find the first JSON object in the string
	m = re.search(r"\{[\s\S]*\}", output_text)
	if m:
	try:
	parsed = json.loads(m.group(0))
	except Exception:
	parsed = None

	taglines: List[Dict[str, str]] = []
	if parsed and isinstance(parsed, dict) and "taglines" in parsed:
	for item in parsed["taglines"]:
	line = (item.get("line") or "").strip()
	expl = (item.get("explanation") or "").strip()
	if line:
	taglines.append({"Tagline": line, "Why it works": expl})
	else:
	# Fallback: heuristic split by lines/bullets
	candidates = re.split(r"[\n•\-\d\)]\s+", output_text)
	for c in candidates:
	c = c.strip().strip('"').strip()
	if 0 < len(c.split()) <= max_words and 3 <= len(c) <= 90:
	taglines.append({"Tagline": c, "Why it works": ""})

	if not taglines:
	st.warning("No taglines parsed. Try increasing max tokens or adjust constraints.")
	else:
	results_df = pd.DataFrame(taglines)

	if results_df is not None and not results_df.empty:
	st.subheader("Results")
	st.dataframe(results_df, use_container_width=True, hide_index=True)

	csv_bytes = results_df.to_csv(index=False).encode("utf-8")
	st.download_button(
	"Download CSV",
	data=csv_bytes,
	file_name="taglines.csv",
	mime="text/csv",
	use_container_width=True,
	)

	st.markdown(":sparkles: Tip: Click Generate again for fresh variations; toggle a seed for reproducibility.")

	st.markdown("---")

	with st.expander("Troubleshooting"):
	st.markdown(
	"- 401/403 errors → Ensure your Space has a valid `HUGGINGFACE_API_TOKEN` secret with access to the selected model.\n"
	"- Empty results → Paste the About text manually, increase `max words`, or try a different model.\n"
	"- Slow output → Reduce `# of taglines` or switch to a lighter model.\n"
	"- Different language → Change the Language field; the model will write in that language."
	)