import streamlit as st import requests import os import json import re from bs4 import BeautifulSoup from urllib.parse import urlparse st.set_page_config(page_title="AI Sentiment Analyzer", page_icon="🌐", layout="wide") st.markdown(""" """, unsafe_allow_html=True) def fetch_url_text(url: str) -> tuple[str, str]: headers = {"User-Agent": "Mozilla/5.0 (compatible; InsightBot/1.0)"} r = requests.get(url, headers=headers, timeout=15) r.raise_for_status() soup = BeautifulSoup(r.text, "html.parser") title = soup.title.string.strip() if soup.title else urlparse(url).netloc for tag in soup(["script", "style", "nav", "footer", "header", "aside", "form"]): tag.decompose() text = soup.get_text(separator=" ", strip=True) text = re.sub(r'\s+', ' ', text).strip() return text[:4000], title def analyze_content(text: str, url: str, title: str, api_key: str) -> dict: prompt = f"""You are an expert content analyst. Analyze the following webpage content and extract deep insights. Source URL: {url} Page Title: {title} Content: {text} Respond ONLY with a valid JSON object in exactly this format: {{ "sentiment": "", "sentiment_score": , "sentiment_explanation": "<1-2 sentences explaining the sentiment>", "one_line_summary": "", "key_themes": ["", "", "", "", ""], "named_entities": {{ "persons": [""], "organizations": [""], "locations": [""], "products": [""] }}, "content_type": "", "target_audience": "", "key_insights": ["", "", ""], "tone": "", "credibility_signals": ["", ""], "word_count_estimate": }}""" headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} payload = { "model": "llama-3.3-70b-versatile", "messages": [{"role": "user", "content": prompt}], "max_tokens": 1000, "temperature": 0.1, } r = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=headers, json=payload, timeout=30) r.raise_for_status() raw = r.json()["choices"][0]["message"]["content"] raw = re.sub(r"```json|```", "", raw).strip() return json.loads(raw) # ─── Sidebar ────────────────────────────────────────────────────────────────── with st.sidebar: st.markdown("## 🌐 Sentiment Analyzer") st.markdown("

AI-powered content intelligence

", unsafe_allow_html=True) st.markdown("---") env_key = os.environ.get("GROQ_API_KEY", "") api_key = env_key if env_key else st.text_input("🔑 Groq API Key", type="password", placeholder="gsk_...") if not env_key and not api_key: st.caption("Free key → [console.groq.com](https://console.groq.com)") st.markdown("---") st.markdown("""

Extracts
🎭 Sentiment & Score
🏷️ Key Themes
👤 Named Entities
💡 Key Insights
🎯 Target Audience
🗣️ Content Tone
📰 Content Type

""", unsafe_allow_html=True) st.markdown("---") st.markdown("""

Try these URLs
• Any news article
• Amazon product page
• Wikipedia article
• Company blog post
• G2 / Trustpilot review

""", unsafe_allow_html=True) # ─── Main UI ────────────────────────────────────────────────────────────────── st.markdown("""

🌐 AI Webpage Sentiment & Insight Analyzer

Paste any URL — AI extracts sentiment, themes, entities, tone, and key insights in seconds

""", unsafe_allow_html=True) if not api_key: st.warning("👈 Add your Groq API key in the sidebar.") st.stop() st.markdown("

Paste a URL to analyze

", unsafe_allow_html=True) col_input, col_btn = st.columns([5, 1]) with col_input: url_input = st.text_input("URL", placeholder="https://...", label_visibility="collapsed") with col_btn: analyze_btn = st.button("Analyze ➤", type="primary", use_container_width=True) # Example URLs st.markdown("

Quick examples

", unsafe_allow_html=True) examples = [ "https://en.wikipedia.org/wiki/Artificial_intelligence", "https://techcrunch.com", "https://www.bbc.com/news", ] cols = st.columns(len(examples)) clicked_url = None for i, ex in enumerate(examples): parsed = urlparse(ex) label = parsed.netloc if cols[i].button(f"🔗 {label}", key=f"ex_{i}", use_container_width=True): clicked_url = ex final_url = clicked_url or (url_input if analyze_btn else None) if final_url: with st.spinner(f"Fetching and analyzing {final_url}..."): try: content_text, page_title = fetch_url_text(final_url) result = analyze_content(content_text, final_url, page_title, api_key) sentiment = result.get("sentiment", "Neutral") score = result.get("sentiment_score", 0) sentiment_color = ( "#4ade80" if sentiment == "Positive" else "#f87171" if sentiment == "Negative" else "#fbbf24" if sentiment == "Mixed" else "#94a3b8" ) sentiment_emoji = ( "😊" if sentiment == "Positive" else "😟" if sentiment == "Negative" else "😐" if sentiment == "Neutral" else "🤔" ) score_pct = int((score + 1) / 2 * 100) st.markdown(f"

🔗 {final_url}

", unsafe_allow_html=True) st.markdown(f"### 📄 {page_title}") # Top row col_sent, col_summary = st.columns([1, 2]) with col_sent: st.markdown(f"""

Sentiment

{sentiment_emoji}

{sentiment}

score: {score:+.2f}

{result.get("sentiment_explanation","")}

""", unsafe_allow_html=True) with col_summary: entities = result.get("named_entities", {}) persons_html = "".join([f"👤 {e}" for e in entities.get("persons", [])[:4]]) orgs_html = "".join([f"🏢 {e}" for e in entities.get("organizations", [])[:4]]) locations_html = "".join([f"📍 {e}" for e in entities.get("locations", [])[:3]]) products_html = "".join([f"📦 {e}" for e in entities.get("products", [])[:3]]) entities_html = persons_html + orgs_html + locations_html + products_html or "None detected" st.markdown(f"""

One-Line Summary

"{result.get("one_line_summary","")}"

Content Type
{result.get("content_type","")}

Tone
{result.get("tone","")}

Audience
{result.get("target_audience","")}

Named Entities

{entities_html}

""", unsafe_allow_html=True) # Themes + Insights col_themes, col_insights = st.columns(2) with col_themes: themes_html = "".join([f"

#{t}

" for t in result.get("key_themes", [])]) st.markdown(f"""

🏷️ Key Themes

{themes_html}

""", unsafe_allow_html=True) with col_insights: insights_html = "".join([f"

→ {ins}

" for ins in result.get("key_insights", [])]) st.markdown(f"""

💡 Key Insights

{insights_html}

""", unsafe_allow_html=True) # Credibility cred = result.get("credibility_signals", []) if cred: cred_html = "".join([f"✓ {c}" for c in cred]) st.markdown(f"""

🛡️ Credibility Signals

{cred_html}

""", unsafe_allow_html=True) except requests.exceptions.ConnectionError: st.error("❌ Could not reach that URL. Make sure it's publicly accessible.") except json.JSONDecodeError: st.error("❌ AI returned unexpected output. Try again.") except Exception as e: st.error(f"❌ Error: {str(e)}")