Spaces:

anir-1995
/

AI_Sentiment_Analyzer

Sleeping

AI_Sentiment_Analyzer / src /streamlit_app.py

anirudh-np-ds

feat: AI webpage sentiment and insight analyzer

f6c1b27 about 1 month ago

14.2 kB

	import streamlit as st
	import requests
	import os
	import json
	import re
	from bs4 import BeautifulSoup
	from urllib.parse import urlparse

	st.set_page_config(page_title="AI Sentiment Analyzer", page_icon="🌐", layout="wide")

	st.markdown("""
	<style>
	@import url('https://fonts.googleapis.com/css2?family=Sora:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap');
	html, body, [class*="css"] { font-family: 'Sora', sans-serif; }
	.main { background: #0a0a0f; }

	.hero {
	background: linear-gradient(135deg, #0d0d1a 0%, #0a0a0f 100%);
	border: 1px solid #1e1e2e; border-top: 3px solid #a78bfa;
	border-radius: 14px; padding: 28px 32px; margin-bottom: 24px;
	}
	.hero h1 { font-size: 1.8rem; font-weight: 700; color: #f1f5f9; margin: 0 0 6px 0; }
	.hero p { color: #4b5563; font-size: 0.88rem; margin: 0; }

	.insight-card {
	background: #0d0d1a; border: 1px solid #1e1e2e;
	border-radius: 12px; padding: 20px 24px; margin: 10px 0;
	}

	/* Sentiment meter */
	.sentiment-positive { color: #4ade80; }
	.sentiment-negative { color: #f87171; }
	.sentiment-neutral { color: #94a3b8; }
	.sentiment-mixed { color: #fbbf24; }

	.big-sentiment {
	font-size: 3rem; font-weight: 700; text-align: center;
	padding: 20px; letter-spacing: -0.02em;
	}
	.sentiment-score-label {
	text-align: center; font-size: 0.82rem; color: #4b5563;
	font-family: 'JetBrains Mono', monospace;
	}

	.entity-tag {
	display: inline-block; border-radius: 6px;
	padding: 4px 10px; font-size: 0.78rem; margin: 3px;
	font-family: 'JetBrains Mono', monospace;
	}
	.entity-person { background: rgba(167,139,250,0.12); color: #a78bfa; border: 1px solid rgba(167,139,250,0.25); }
	.entity-org { background: rgba(59,130,246,0.1); color: #60a5fa; border: 1px solid rgba(59,130,246,0.25); }
	.entity-location { background: rgba(34,197,94,0.1); color: #4ade80; border: 1px solid rgba(34,197,94,0.25); }
	.entity-topic { background: rgba(251,191,36,0.1); color: #fbbf24; border: 1px solid rgba(251,191,36,0.25); }
	.entity-product { background: rgba(248,113,113,0.1); color: #f87171; border: 1px solid rgba(248,113,113,0.25); }

	.theme-pill {
	display: inline-block; background: #1e1e2e; border: 1px solid #2d2d3e;
	border-radius: 20px; padding: 5px 14px; margin: 4px;
	font-size: 0.8rem; color: #94a3b8;
	}

	.section-label {
	font-size: 0.68rem; text-transform: uppercase; letter-spacing: 0.1em;
	color: #2d2d3e; font-weight: 600; margin: 18px 0 8px 0;
	}

	.stat-row { display: flex; gap: 10px; margin: 16px 0; }
	.stat-box {
	flex: 1; background: #0d0d1a; border: 1px solid #1e1e2e;
	border-radius: 10px; padding: 14px; text-align: center;
	}
	.stat-val { font-size: 1.3rem; font-weight: 700; color: #f1f5f9; }
	.stat-lbl { font-size: 0.68rem; color: #4b5563; margin-top: 2px; }

	.url-chip {
	background: #0d0d1a; border: 1px solid #1e1e2e; border-radius: 8px;
	padding: 10px 14px; font-family: 'JetBrains Mono', monospace;
	font-size: 0.78rem; color: #4b5563; word-break: break-all;
	margin-bottom: 16px;
	}

	section[data-testid="stSidebar"] { background: #060609; border-right: 1px solid #1e1e2e; }
	</style>
	""", unsafe_allow_html=True)


	def fetch_url_text(url: str) -> tuple[str, str]:
	headers = {"User-Agent": "Mozilla/5.0 (compatible; InsightBot/1.0)"}
	r = requests.get(url, headers=headers, timeout=15)
	r.raise_for_status()
	soup = BeautifulSoup(r.text, "html.parser")
	title = soup.title.string.strip() if soup.title else urlparse(url).netloc
	for tag in soup(["script", "style", "nav", "footer", "header", "aside", "form"]):
	tag.decompose()
	text = soup.get_text(separator=" ", strip=True)
	text = re.sub(r'\s+', ' ', text).strip()
	return text[:4000], title


	def analyze_content(text: str, url: str, title: str, api_key: str) -> dict:
	prompt = f"""You are an expert content analyst. Analyze the following webpage content and extract deep insights.

	Source URL: {url}
	Page Title: {title}

	Content:
	{text}

	Respond ONLY with a valid JSON object in exactly this format:
	{{
	"sentiment": "<one of: Positive \| Negative \| Neutral \| Mixed>",
	"sentiment_score": <float between -1.0 (very negative) and 1.0 (very positive)>,
	"sentiment_explanation": "<1-2 sentences explaining the sentiment>",
	"one_line_summary": "<single sentence capturing the entire content>",
	"key_themes": ["<theme 1>", "<theme 2>", "<theme 3>", "<theme 4>", "<theme 5>"],
	"named_entities": {{
	"persons": ["<name>"],
	"organizations": ["<org>"],
	"locations": ["<location>"],
	"products": ["<product>"]
	}},
	"content_type": "<one of: News Article \| Product Page \| Review \| Blog Post \| Research \| Social Media \| Other>",
	"target_audience": "<who this content is written for>",
	"key_insights": ["<insight 1>", "<insight 2>", "<insight 3>"],
	"tone": "<one of: Informative \| Promotional \| Critical \| Analytical \| Emotional \| Persuasive \| Neutral>",
	"credibility_signals": ["<signal 1>", "<signal 2>"],
	"word_count_estimate": <integer>
	}}"""

	headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
	payload = {
	"model": "llama-3.3-70b-versatile",
	"messages": [{"role": "user", "content": prompt}],
	"max_tokens": 1000,
	"temperature": 0.1,
	}
	r = requests.post("https://api.groq.com/openai/v1/chat/completions",
	headers=headers, json=payload, timeout=30)
	r.raise_for_status()
	raw = r.json()["choices"][0]["message"]["content"]
	raw = re.sub(r"```json\|```", "", raw).strip()
	return json.loads(raw)


	# ─── Sidebar ──────────────────────────────────────────────────────────────────
	with st.sidebar:
	st.markdown("## 🌐 Sentiment Analyzer")
	st.markdown("<div style='color:#2d2d3e;font-size:0.8rem'>AI-powered content intelligence</div>", unsafe_allow_html=True)
	st.markdown("---")
	env_key = os.environ.get("GROQ_API_KEY", "")
	api_key = env_key if env_key else st.text_input("🔑 Groq API Key", type="password", placeholder="gsk_...")
	if not env_key and not api_key:
	st.caption("Free key → [console.groq.com](https://console.groq.com)")
	st.markdown("---")
	st.markdown("""
	<div style='font-size:0.78rem;color:#2d2d3e;line-height:2'>
	<b style='color:#4b5563'>Extracts</b><br>
	🎭 Sentiment & Score<br>
	🏷️ Key Themes<br>
	👤 Named Entities<br>
	💡 Key Insights<br>
	🎯 Target Audience<br>
	🗣️ Content Tone<br>
	📰 Content Type
	</div>""", unsafe_allow_html=True)
	st.markdown("---")
	st.markdown("""
	<div style='font-size:0.78rem;color:#2d2d3e;line-height:2'>
	<b style='color:#4b5563'>Try these URLs</b><br>
	• Any news article<br>
	• Amazon product page<br>
	• Wikipedia article<br>
	• Company blog post<br>
	• G2 / Trustpilot review
	</div>""", unsafe_allow_html=True)


	# ─── Main UI ──────────────────────────────────────────────────────────────────
	st.markdown("""
	<div class='hero'>
	<h1>🌐 AI Webpage Sentiment & Insight Analyzer</h1>
	<p>Paste any URL — AI extracts sentiment, themes, entities, tone, and key insights in seconds</p>
	</div>
	""", unsafe_allow_html=True)

	if not api_key:
	st.warning("👈 Add your Groq API key in the sidebar.")
	st.stop()

	st.markdown("<div class='section-label'>Paste a URL to analyze</div>", unsafe_allow_html=True)
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	url_input = st.text_input("URL", placeholder="https://...", label_visibility="collapsed")
	with col_btn:
	analyze_btn = st.button("Analyze ➤", type="primary", use_container_width=True)

	# Example URLs
	st.markdown("<div class='section-label'>Quick examples</div>", unsafe_allow_html=True)
	examples = [
	"https://en.wikipedia.org/wiki/Artificial_intelligence",
	"https://techcrunch.com",
	"https://www.bbc.com/news",
	]
	cols = st.columns(len(examples))
	clicked_url = None
	for i, ex in enumerate(examples):
	parsed = urlparse(ex)
	label = parsed.netloc
	if cols[i].button(f"🔗 {label}", key=f"ex_{i}", use_container_width=True):
	clicked_url = ex

	final_url = clicked_url or (url_input if analyze_btn else None)

	if final_url:
	with st.spinner(f"Fetching and analyzing {final_url}..."):
	try:
	content_text, page_title = fetch_url_text(final_url)
	result = analyze_content(content_text, final_url, page_title, api_key)

	sentiment = result.get("sentiment", "Neutral")
	score = result.get("sentiment_score", 0)
	sentiment_color = (
	"#4ade80" if sentiment == "Positive" else
	"#f87171" if sentiment == "Negative" else
	"#fbbf24" if sentiment == "Mixed" else
	"#94a3b8"
	)
	sentiment_emoji = (
	"😊" if sentiment == "Positive" else
	"😟" if sentiment == "Negative" else
	"😐" if sentiment == "Neutral" else "🤔"
	)
	score_pct = int((score + 1) / 2 * 100)

	st.markdown(f"<div class='url-chip'>🔗 {final_url}</div>", unsafe_allow_html=True)
	st.markdown(f"### 📄 {page_title}")

	# Top row
	col_sent, col_summary = st.columns([1, 2])

	with col_sent:
	st.markdown(f"""
	<div class='insight-card' style='text-align:center'>
	<div style='font-size:0.72rem;text-transform:uppercase;letter-spacing:0.1em;color:#4b5563;margin-bottom:8px'>Sentiment</div>
	<div style='font-size:3.5rem'>{sentiment_emoji}</div>
	<div style='font-size:1.6rem;font-weight:700;color:{sentiment_color};margin:4px 0'>{sentiment}</div>
	<div style='font-family:JetBrains Mono,monospace;font-size:0.8rem;color:#4b5563'>score: {score:+.2f}</div>
	<div style='background:#1e1e2e;border-radius:4px;height:6px;margin:10px 0'>
	<div style='height:6px;border-radius:4px;width:{score_pct}%;background:{sentiment_color}'></div>
	</div>
	<div style='font-size:0.78rem;color:#4b5563;margin-top:8px'>{result.get("sentiment_explanation","")}</div>
	</div>
	""", unsafe_allow_html=True)

	with col_summary:
	entities = result.get("named_entities", {})
	persons_html = "".join([f"<span class='entity-tag entity-person'>👤 {e}</span>" for e in entities.get("persons", [])[:4]])
	orgs_html = "".join([f"<span class='entity-tag entity-org'>🏢 {e}</span>" for e in entities.get("organizations", [])[:4]])
	locations_html = "".join([f"<span class='entity-tag entity-location'>📍 {e}</span>" for e in entities.get("locations", [])[:3]])
	products_html = "".join([f"<span class='entity-tag entity-product'>📦 {e}</span>" for e in entities.get("products", [])[:3]])
	entities_html = persons_html + orgs_html + locations_html + products_html or "<span style='color:#4b5563;font-size:0.82rem'>None detected</span>"

	st.markdown(f"""
	<div class='insight-card'>
	<div style='font-size:0.72rem;text-transform:uppercase;letter-spacing:0.1em;color:#4b5563;margin-bottom:10px'>One-Line Summary</div>
	<div style='font-size:1rem;color:#f1f5f9;font-weight:500;line-height:1.6;margin-bottom:16px'>"{result.get("one_line_summary","")}"</div>
	<div style='display:flex;gap:16px;margin-bottom:14px'>
	<div><span style='font-size:0.72rem;color:#4b5563'>Content Type</span><br><span style='color:#a78bfa;font-weight:600;font-size:0.88rem'>{result.get("content_type","")}</span></div>
	<div><span style='font-size:0.72rem;color:#4b5563'>Tone</span><br><span style='color:#60a5fa;font-weight:600;font-size:0.88rem'>{result.get("tone","")}</span></div>
	<div><span style='font-size:0.72rem;color:#4b5563'>Audience</span><br><span style='color:#4ade80;font-weight:600;font-size:0.88rem'>{result.get("target_audience","")}</span></div>
	</div>
	<div style='font-size:0.72rem;text-transform:uppercase;letter-spacing:0.1em;color:#4b5563;margin-bottom:8px'>Named Entities</div>
	{entities_html}
	</div>
	""", unsafe_allow_html=True)

	# Themes + Insights
	col_themes, col_insights = st.columns(2)

	with col_themes:
	themes_html = "".join([f"<div class='theme-pill'>#{t}</div>" for t in result.get("key_themes", [])])
	st.markdown(f"""
	<div class='insight-card'>
	<div style='font-size:0.72rem;text-transform:uppercase;letter-spacing:0.1em;color:#4b5563;margin-bottom:12px'>🏷️ Key Themes</div>
	{themes_html}
	</div>""", unsafe_allow_html=True)

	with col_insights:
	insights_html = "".join([f"<div style='padding:8px 0;border-bottom:1px solid #1e1e2e;font-size:0.87rem;color:#94a3b8;line-height:1.6'>→ {ins}</div>" for ins in result.get("key_insights", [])])
	st.markdown(f"""
	<div class='insight-card'>
	<div style='font-size:0.72rem;text-transform:uppercase;letter-spacing:0.1em;color:#4b5563;margin-bottom:12px'>💡 Key Insights</div>
	{insights_html}
	</div>""", unsafe_allow_html=True)

	# Credibility
	cred = result.get("credibility_signals", [])
	if cred:
	cred_html = "".join([f"<span style='background:rgba(74,222,128,0.08);border:1px solid rgba(74,222,128,0.2);border-radius:6px;padding:4px 12px;margin:3px;display:inline-block;font-size:0.8rem;color:#4ade80'>✓ {c}</span>" for c in cred])
	st.markdown(f"""
	<div class='insight-card'>
	<div style='font-size:0.72rem;text-transform:uppercase;letter-spacing:0.1em;color:#4b5563;margin-bottom:10px'>🛡️ Credibility Signals</div>
	{cred_html}
	</div>""", unsafe_allow_html=True)

	except requests.exceptions.ConnectionError:
	st.error("❌ Could not reach that URL. Make sure it's publicly accessible.")
	except json.JSONDecodeError:
	st.error("❌ AI returned unexpected output. Try again.")
	except Exception as e:
	st.error(f"❌ Error: {str(e)}")