import streamlit as st import os from datetime import datetime import pandas as pd from web_scraper import scrape_article_content from brand_analyzer import BrandAnalyzer from search_engines import multi_engine_search, batch_analyze_brands from db_operations import ( save_analysis_to_db, get_historical_analyses, get_all_mentions, save_co_mentions, get_co_mention_network, create_scheduled_job, get_scheduled_jobs ) import plotly.express as px import plotly.graph_objects as go import networkx as nx from collections import Counter, defaultdict from scheduler import get_scheduler from database import init_database from dotenv import load_dotenv # Load environment variables and initialize database load_dotenv() init_database() # Page configuration st.set_page_config( page_title="Brand Monitoring Dashboard", page_icon="πŸ”", layout="wide" ) # Initialize scheduler try: scheduler = get_scheduler() except Exception as e: st.warning(f"Scheduler initialization warning: {e}") # Initialize session state if 'batch_results' not in st.session_state: st.session_state.batch_results = {} if 'current_page' not in st.session_state: st.session_state.current_page = 'Analysis' if 'selected_analysis_ids' not in st.session_state: st.session_state.selected_analysis_ids = [] # Sidebar navigation st.sidebar.title("πŸ” Brand Monitor Pro") page = st.sidebar.radio( "Navigation", ["Analysis", "Dashboard", "Co-Mention Network", "Scheduled Monitoring", "History"] ) def create_csv_export(results: dict) -> str: """Create CSV content from batch analysis results""" csv_data = [] for brand_name, analysis_results in results.items(): for result in analysis_results: analysis = result.get('analysis', {}) # Add explicit mentions for mention in analysis.get('explicit_mentions', []): csv_data.append({ 'Brand': brand_name, 'URL': result['url'], 'Article Title': result['title'], 'Source': result.get('source', 'unknown'), 'Mention Type': 'Explicit', 'Mention Text': mention.get('mention', ''), 'Context': mention.get('context', ''), 'Sentiment': mention.get('sentiment', ''), 'Explanation': mention.get('explanation', ''), 'Timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S') }) # Add indirect mentions for mention in analysis.get('indirect_mentions', []): csv_data.append({ 'Brand': brand_name, 'URL': result['url'], 'Article Title': result['title'], 'Source': result.get('source', 'unknown'), 'Mention Type': 'Indirect', 'Mention Text': mention.get('reference', ''), 'Context': mention.get('context', ''), 'Sentiment': mention.get('sentiment', ''), 'Explanation': mention.get('explanation', ''), 'Timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S') }) if csv_data: df = pd.DataFrame(csv_data) return df.to_csv(index=False) else: return "No data to export" def render_analysis_page(): """Render the main analysis page with batch processing""" st.title("πŸ” Brand Mention Analysis") st.markdown("Analyze multiple brands across different search engines simultaneously") # Configuration sidebar with st.sidebar: st.header("πŸ”§ Configuration") search_query = st.text_input( "Search Query", placeholder="e.g., AI startups 2024", help="Base search query to find relevant articles" ) brand_names_input = st.text_area( "Brand Names (one per line)", placeholder="OpenAI\nAnthropic\nGoogle AI", help="Enter brand names to monitor, one per line" ) # Search engine selection st.subheader("🌐 Search Engines") use_google = st.checkbox("Google (SerpAPI)", value=True) use_bing = st.checkbox("Bing") use_duckduckgo = st.checkbox("DuckDuckGo") search_engines = [] if use_google: search_engines.append('google') if use_bing: search_engines.append('bing') if use_duckduckgo: search_engines.append('duckduckgo') num_results = st.slider( "Results per engine", min_value=5, max_value=15, value=10 ) custom_prompt = st.text_area( "Custom Analysis Prompt (Optional)", placeholder="Leave empty for default analysis...", height=100 ) analyze_button = st.button("πŸš€ Start Batch Analysis", type="primary", use_container_width=True) # Export section if st.session_state.batch_results: st.markdown("---") st.subheader("πŸ“₯ Export Results") csv_content = create_csv_export(st.session_state.batch_results) timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') st.download_button( label="πŸ’Ύ Download CSV Report", data=csv_content, file_name=f"batch_brand_analysis_{timestamp}.csv", mime="text/csv", use_container_width=True ) # Main content if analyze_button: if not search_query: st.error("⚠️ Please enter a search query") return if not brand_names_input.strip(): st.error("⚠️ Please enter at least one brand name") return if not search_engines: st.error("⚠️ Please select at least one search engine") return # Parse brand names brand_names = [b.strip() for b in brand_names_input.strip().split('\n') if b.strip()] st.info(f"πŸ“Š Analyzing {len(brand_names)} brand(s) across {len(search_engines)} search engine(s)") # Batch analysis analyzer = BrandAnalyzer() batch_results = batch_analyze_brands( search_query, brand_names, search_engines, num_results, custom_prompt, analyzer, scrape_article_content ) # Save to database and session state st.session_state.batch_results = batch_results for brand_name, results in batch_results.items(): if results: # Save to database for engine in search_engines: save_analysis_to_db(search_query, brand_name, engine, results) # Track co-mentions if multiple brands if len(brand_names) > 1: for result in results: if result.get('analysis', {}).get('explicit_mentions') or result.get('analysis', {}).get('indirect_mentions'): # Find which brands are mentioned in this article mentioned_brands = [] for other_brand in brand_names: if other_brand != brand_name: # Check if other brand is mentioned article_content = result.get('content', '').lower() if other_brand.lower() in article_content: mentioned_brands.append(other_brand) if mentioned_brands: mentioned_brands.append(brand_name) # This will be saved when we have article_id st.success("βœ… Batch analysis complete and saved to database!") st.rerun() # Display results if st.session_state.batch_results: st.markdown("---") st.subheader("πŸ“Š Analysis Results") # Summary metrics total_brands = len(st.session_state.batch_results) total_articles = sum(len(results) for results in st.session_state.batch_results.values()) total_mentions = sum( sum(r.get('total_mentions', 0) for r in results) for results in st.session_state.batch_results.values() ) col1, col2, col3 = st.columns(3) with col1: st.metric("Brands Analyzed", total_brands) with col2: st.metric("Total Articles", total_articles) with col3: st.metric("Total Mentions", total_mentions) # Display results by brand for brand_name, results in st.session_state.batch_results.items(): with st.expander(f"**{brand_name}** - {len(results)} articles"): if not results: st.info("No results found") continue # Brand-specific metrics mentions_count = sum(r.get('total_mentions', 0) for r in results) articles_with_mentions = sum(1 for r in results if r.get('total_mentions', 0) > 0) col1, col2 = st.columns(2) with col1: st.metric("Articles with Mentions", articles_with_mentions) with col2: st.metric("Total Mentions", mentions_count) # Show top mentions for i, result in enumerate(results[:5]): # Show top 5 analysis = result.get('analysis', {}) if analysis.get('explicit_mentions') or analysis.get('indirect_mentions'): st.markdown(f"**πŸ“„ {result['title'][:80]}...**") st.caption(f"πŸ”— {result['url']} | Source: {result.get('source', 'unknown')}") for mention in analysis.get('explicit_mentions', [])[:2]: sentiment_emoji = {"positive": "😊", "negative": "😞", "neutral": "😐"}.get(mention.get('sentiment'), "😐") st.markdown(f"- {sentiment_emoji} *{mention.get('mention', '')}*") elif not st.session_state.batch_results: st.info("πŸ‘ˆ Configure your analysis in the sidebar and click 'Start Batch Analysis' to begin") def render_dashboard(): """Render the analytics dashboard""" st.title("πŸ“Š Brand Analytics Dashboard") # Get historical data analyses = get_historical_analyses(limit=100) if not analyses: st.info("No historical data available. Run some analyses first!") return # Filter controls st.sidebar.subheader("πŸ“Š Dashboard Filters") # Brand filter all_brands = list(set(a.brand_name for a in analyses)) selected_brands = st.sidebar.multiselect( "Filter by Brand", all_brands, default=all_brands[:5] if len(all_brands) > 5 else all_brands ) # Time filter time_range = st.sidebar.selectbox( "Time Range", ["Last 24 hours", "Last 7 days", "Last 30 days", "All time"] ) # Filter analyses filtered_analyses = [a for a in analyses if a.brand_name in selected_brands] # Summary metrics col1, col2, col3, col4 = st.columns(4) total_analyses = len(filtered_analyses) total_mentions = sum(a.total_mentions for a in filtered_analyses) avg_sentiment = sum(a.positive_count for a in filtered_analyses) / max(total_mentions, 1) with col1: st.metric("Total Analyses", total_analyses) with col2: st.metric("Total Mentions", total_mentions) with col3: st.metric("Avg Positive %", f"{avg_sentiment*100:.1f}%") with col4: active_brands = len(set(a.brand_name for a in filtered_analyses)) st.metric("Active Brands", active_brands) # Sentiment Distribution Chart st.subheader("πŸ“ˆ Sentiment Distribution") sentiment_data = [] for analysis in filtered_analyses: sentiment_data.append({ 'Positive': analysis.positive_count, 'Negative': analysis.negative_count, 'Neutral': analysis.neutral_count }) if sentiment_data: total_positive = sum(d['Positive'] for d in sentiment_data) total_negative = sum(d['Negative'] for d in sentiment_data) total_neutral = sum(d['Neutral'] for d in sentiment_data) col1, col2 = st.columns(2) with col1: # Pie chart fig_pie = go.Figure(data=[go.Pie( labels=['Positive', 'Negative', 'Neutral'], values=[total_positive, total_negative, total_neutral], marker=dict(colors=['#00D26A', '#FF5C5C', '#FFD700']) )]) fig_pie.update_layout(title="Overall Sentiment Distribution") st.plotly_chart(fig_pie, use_container_width=True) with col2: # Bar chart by brand brand_sentiment = defaultdict(lambda: {'positive': 0, 'negative': 0, 'neutral': 0}) for analysis in filtered_analyses: brand_sentiment[analysis.brand_name]['positive'] += analysis.positive_count brand_sentiment[analysis.brand_name]['negative'] += analysis.negative_count brand_sentiment[analysis.brand_name]['neutral'] += analysis.neutral_count brands = list(brand_sentiment.keys()) positive_vals = [brand_sentiment[b]['positive'] for b in brands] negative_vals = [brand_sentiment[b]['negative'] for b in brands] neutral_vals = [brand_sentiment[b]['neutral'] for b in brands] fig_bar = go.Figure(data=[ go.Bar(name='Positive', x=brands, y=positive_vals, marker_color='#00D26A'), go.Bar(name='Negative', x=brands, y=negative_vals, marker_color='#FF5C5C'), go.Bar(name='Neutral', x=brands, y=neutral_vals, marker_color='#FFD700') ]) fig_bar.update_layout( title="Sentiment by Brand", barmode='stack', xaxis_title="Brand", yaxis_title="Mentions" ) st.plotly_chart(fig_bar, use_container_width=True) # Trend over time st.subheader("πŸ“… Mention Trends Over Time") trend_data = [] for analysis in filtered_analyses: trend_data.append({ 'Date': analysis.created_at.date(), 'Brand': analysis.brand_name, 'Mentions': analysis.total_mentions }) if trend_data: df_trend = pd.DataFrame(trend_data) fig_trend = px.line( df_trend, x='Date', y='Mentions', color='Brand', title="Brand Mentions Over Time" ) st.plotly_chart(fig_trend, use_container_width=True) # Detailed mentions table with filtering st.subheader("πŸ” Detailed Mentions") # Get all mentions for filtered analyses all_mentions = [] for analysis in filtered_analyses: mentions = get_all_mentions(analysis_id=analysis.id) all_mentions.extend(mentions) if all_mentions: # Sentiment filter sentiment_filter = st.multiselect( "Filter by Sentiment", ["positive", "negative", "neutral"], default=["positive", "negative", "neutral"] ) # Sort options sort_by = st.selectbox( "Sort by", ["Date (Newest)", "Date (Oldest)", "Confidence (High to Low)", "Confidence (Low to High)"] ) # Filter mentions filtered_mentions = [m for m in all_mentions if m.sentiment in sentiment_filter] # Sort mentions if sort_by == "Date (Newest)": filtered_mentions.sort(key=lambda x: x.created_at, reverse=True) elif sort_by == "Date (Oldest)": filtered_mentions.sort(key=lambda x: x.created_at) elif sort_by == "Confidence (High to Low)": filtered_mentions.sort(key=lambda x: x.confidence, reverse=True) else: filtered_mentions.sort(key=lambda x: x.confidence) # Display mentions for mention in filtered_mentions[:20]: # Limit to 20 sentiment_emoji = {"positive": "😊", "negative": "😞", "neutral": "😐"}.get(mention.sentiment, "😐") with st.expander(f"{sentiment_emoji} {mention.brand_name} - {mention.mention_type} ({mention.confidence:.0%} confidence)"): st.markdown(f"**Mention:** {mention.mention_text}") st.markdown(f"**Context:** {mention.context}") st.caption(f"**Explanation:** {mention.explanation}") st.caption(f"**Date:** {mention.created_at.strftime('%Y-%m-%d %H:%M')}") def render_co_mention_network(): """Render co-mention network visualization""" st.title("πŸ•ΈοΈ Brand Co-Mention Network") st.markdown("Visualize which brands are frequently mentioned together in articles") co_mentions = get_co_mention_network() if not co_mentions: st.info("No co-mention data available. Analyze multiple brands together to see relationships!") return # Build network graph G = nx.Graph() # Add edges with weights edge_data = defaultdict(int) for cm in co_mentions: edge_data[(cm.brand1, cm.brand2)] += cm.co_occurrence_count for (brand1, brand2), count in edge_data.items(): G.add_edge(brand1, brand2, weight=count) # Calculate layout pos = nx.spring_layout(G, k=2, iterations=50) # Create edge trace edge_traces = [] for edge in G.edges(): x0, y0 = pos[edge[0]] x1, y1 = pos[edge[1]] weight = G[edge[0]][edge[1]]['weight'] edge_trace = go.Scatter( x=[x0, x1, None], y=[y0, y1, None], mode='lines', line=dict(width=weight*2, color='#888'), hoverinfo='text', text=f"{edge[0]} ↔ {edge[1]}: {weight} co-mentions", showlegend=False ) edge_traces.append(edge_trace) # Create node trace node_x = [] node_y = [] node_text = [] node_size = [] for node in G.nodes(): x, y = pos[node] node_x.append(x) node_y.append(y) # Calculate node size based on connections connections = G.degree(node) node_size.append(30 + connections * 10) node_text.append(f"{node}
Connections: {connections}") node_trace = go.Scatter( x=node_x, y=node_y, mode='markers+text', text=[node for node in G.nodes()], textposition="top center", hovertext=node_text, hoverinfo='text', marker=dict( size=node_size, color='#1f77b4', line=dict(width=2, color='white') ), showlegend=False ) # Create figure fig = go.Figure(data=edge_traces + [node_trace]) fig.update_layout( title="Brand Co-Mention Network", title_font_size=20, showlegend=False, hovermode='closest', margin=dict(b=0, l=0, r=0, t=40), xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), yaxis=dict(showgrid=False, zeroline=False, showticklabels=False), height=600 ) st.plotly_chart(fig, use_container_width=True) # Network statistics st.subheader("πŸ“Š Network Statistics") col1, col2, col3 = st.columns(3) with col1: st.metric("Total Brands", len(G.nodes())) with col2: st.metric("Total Relationships", len(G.edges())) with col3: density = nx.density(G) st.metric("Network Density", f"{density:.2%}") # Top co-mentions st.subheader("πŸ” Top Co-Mentions") top_pairs = sorted(edge_data.items(), key=lambda x: x[1], reverse=True)[:10] for (brand1, brand2), count in top_pairs: st.write(f"**{brand1}** ↔ **{brand2}**: {count} co-mentions") def render_scheduled_monitoring(): """Render scheduled monitoring page""" st.title("⏰ Scheduled Brand Monitoring") st.markdown("Set up recurring brand analyses") # Create new schedule with st.expander("βž• Create New Schedule", expanded=True): col1, col2 = st.columns(2) with col1: schedule_query = st.text_input("Search Query", placeholder="AI technology news") schedule_brands = st.text_area( "Brand Names (one per line)", placeholder="OpenAI\nGoogle\nMicrosoft" ) with col2: schedule_engines = st.multiselect( "Search Engines", ["google", "bing", "duckduckgo"], default=["google"] ) schedule_frequency = st.selectbox( "Frequency", ["daily", "weekly", "monthly"] ) if st.button("Create Schedule"): if schedule_query and schedule_brands: brands = [b.strip() for b in schedule_brands.split('\n') if b.strip()] job_id = create_scheduled_job( schedule_query, brands, schedule_engines, schedule_frequency ) if job_id: st.success(f"βœ… Schedule created successfully! (ID: {job_id})") st.rerun() else: st.error("Please fill in all fields") # List existing schedules st.subheader("πŸ“… Active Schedules") jobs = get_scheduled_jobs(active_only=True) if not jobs: st.info("No active schedules. Create one above!") else: for job in jobs: with st.expander(f"πŸ”” {job.search_query} - {job.schedule_type}"): st.write(f"**Brands:** {job.brand_names}") st.write(f"**Engines:** {job.search_engines}") st.write(f"**Frequency:** {job.schedule_type}") if job.last_run: st.write(f"**Last Run:** {job.last_run.strftime('%Y-%m-%d %H:%M')}") if job.next_run: st.write(f"**Next Run:** {job.next_run.strftime('%Y-%m-%d %H:%M')}") st.caption(f"Created: {job.created_at.strftime('%Y-%m-%d')}") def render_history(): """Render analysis history""" st.title("πŸ“š Analysis History") analyses = get_historical_analyses(limit=50) if not analyses: st.info("No historical analyses available") return # Create DataFrame history_data = [] for a in analyses: history_data.append({ 'Date': a.created_at.strftime('%Y-%m-%d %H:%M'), 'Brand': a.brand_name, 'Query': a.search_query, 'Engine': a.search_engine, 'Articles': a.total_articles, 'Mentions': a.total_mentions, 'Positive': a.positive_count, 'Negative': a.negative_count, 'Neutral': a.neutral_count }) df_history = pd.DataFrame(history_data) # Display with filtering brand_filter = st.multiselect( "Filter by Brand", df_history['Brand'].unique(), default=None ) if brand_filter: df_history = df_history[df_history['Brand'].isin(brand_filter)] st.dataframe(df_history, use_container_width=True) # Main routing if page == "Analysis": render_analysis_page() elif page == "Dashboard": render_dashboard() elif page == "Co-Mention Network": render_co_mention_network() elif page == "Scheduled Monitoring": render_scheduled_monitoring() elif page == "History": render_history()