Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import os | |
| from datetime import datetime | |
| import pandas as pd | |
| from web_scraper import scrape_article_content | |
| from brand_analyzer import BrandAnalyzer | |
| from search_engines import multi_engine_search, batch_analyze_brands | |
| from db_operations import ( | |
| save_analysis_to_db, get_historical_analyses, get_all_mentions, | |
| save_co_mentions, get_co_mention_network, create_scheduled_job, | |
| get_scheduled_jobs | |
| ) | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| import networkx as nx | |
| from collections import Counter, defaultdict | |
| from scheduler import get_scheduler | |
| from database import init_database | |
| from dotenv import load_dotenv | |
| # Load environment variables and initialize database | |
| load_dotenv() | |
| init_database() | |
| # Page configuration | |
| st.set_page_config( | |
| page_title="Brand Monitoring Dashboard", | |
| page_icon="π", | |
| layout="wide" | |
| ) | |
| # Initialize scheduler | |
| try: | |
| scheduler = get_scheduler() | |
| except Exception as e: | |
| st.warning(f"Scheduler initialization warning: {e}") | |
| # Initialize session state | |
| if 'batch_results' not in st.session_state: | |
| st.session_state.batch_results = {} | |
| if 'current_page' not in st.session_state: | |
| st.session_state.current_page = 'Analysis' | |
| if 'selected_analysis_ids' not in st.session_state: | |
| st.session_state.selected_analysis_ids = [] | |
| # Sidebar navigation | |
| st.sidebar.title("π Brand Monitor Pro") | |
| page = st.sidebar.radio( | |
| "Navigation", | |
| ["Analysis", "Dashboard", "Co-Mention Network", "Scheduled Monitoring", "History"] | |
| ) | |
| def create_csv_export(results: dict) -> str: | |
| """Create CSV content from batch analysis results""" | |
| csv_data = [] | |
| for brand_name, analysis_results in results.items(): | |
| for result in analysis_results: | |
| analysis = result.get('analysis', {}) | |
| # Add explicit mentions | |
| for mention in analysis.get('explicit_mentions', []): | |
| csv_data.append({ | |
| 'Brand': brand_name, | |
| 'URL': result['url'], | |
| 'Article Title': result['title'], | |
| 'Source': result.get('source', 'unknown'), | |
| 'Mention Type': 'Explicit', | |
| 'Mention Text': mention.get('mention', ''), | |
| 'Context': mention.get('context', ''), | |
| 'Sentiment': mention.get('sentiment', ''), | |
| 'Explanation': mention.get('explanation', ''), | |
| 'Timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S') | |
| }) | |
| # Add indirect mentions | |
| for mention in analysis.get('indirect_mentions', []): | |
| csv_data.append({ | |
| 'Brand': brand_name, | |
| 'URL': result['url'], | |
| 'Article Title': result['title'], | |
| 'Source': result.get('source', 'unknown'), | |
| 'Mention Type': 'Indirect', | |
| 'Mention Text': mention.get('reference', ''), | |
| 'Context': mention.get('context', ''), | |
| 'Sentiment': mention.get('sentiment', ''), | |
| 'Explanation': mention.get('explanation', ''), | |
| 'Timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S') | |
| }) | |
| if csv_data: | |
| df = pd.DataFrame(csv_data) | |
| return df.to_csv(index=False) | |
| else: | |
| return "No data to export" | |
| def render_analysis_page(): | |
| """Render the main analysis page with batch processing""" | |
| st.title("π Brand Mention Analysis") | |
| st.markdown("Analyze multiple brands across different search engines simultaneously") | |
| # Configuration sidebar | |
| with st.sidebar: | |
| st.header("π§ Configuration") | |
| search_query = st.text_input( | |
| "Search Query", | |
| placeholder="e.g., AI startups 2024", | |
| help="Base search query to find relevant articles" | |
| ) | |
| brand_names_input = st.text_area( | |
| "Brand Names (one per line)", | |
| placeholder="OpenAI\nAnthropic\nGoogle AI", | |
| help="Enter brand names to monitor, one per line" | |
| ) | |
| # Search engine selection | |
| st.subheader("π Search Engines") | |
| use_google = st.checkbox("Google (SerpAPI)", value=True) | |
| use_bing = st.checkbox("Bing") | |
| use_duckduckgo = st.checkbox("DuckDuckGo") | |
| search_engines = [] | |
| if use_google: | |
| search_engines.append('google') | |
| if use_bing: | |
| search_engines.append('bing') | |
| if use_duckduckgo: | |
| search_engines.append('duckduckgo') | |
| num_results = st.slider( | |
| "Results per engine", | |
| min_value=5, | |
| max_value=15, | |
| value=10 | |
| ) | |
| custom_prompt = st.text_area( | |
| "Custom Analysis Prompt (Optional)", | |
| placeholder="Leave empty for default analysis...", | |
| height=100 | |
| ) | |
| analyze_button = st.button("π Start Batch Analysis", type="primary", use_container_width=True) | |
| # Export section | |
| if st.session_state.batch_results: | |
| st.markdown("---") | |
| st.subheader("π₯ Export Results") | |
| csv_content = create_csv_export(st.session_state.batch_results) | |
| timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') | |
| st.download_button( | |
| label="πΎ Download CSV Report", | |
| data=csv_content, | |
| file_name=f"batch_brand_analysis_{timestamp}.csv", | |
| mime="text/csv", | |
| use_container_width=True | |
| ) | |
| # Main content | |
| if analyze_button: | |
| if not search_query: | |
| st.error("β οΈ Please enter a search query") | |
| return | |
| if not brand_names_input.strip(): | |
| st.error("β οΈ Please enter at least one brand name") | |
| return | |
| if not search_engines: | |
| st.error("β οΈ Please select at least one search engine") | |
| return | |
| # Parse brand names | |
| brand_names = [b.strip() for b in brand_names_input.strip().split('\n') if b.strip()] | |
| st.info(f"π Analyzing {len(brand_names)} brand(s) across {len(search_engines)} search engine(s)") | |
| # Batch analysis | |
| analyzer = BrandAnalyzer() | |
| batch_results = batch_analyze_brands( | |
| search_query, | |
| brand_names, | |
| search_engines, | |
| num_results, | |
| custom_prompt, | |
| analyzer, | |
| scrape_article_content | |
| ) | |
| # Save to database and session state | |
| st.session_state.batch_results = batch_results | |
| for brand_name, results in batch_results.items(): | |
| if results: | |
| # Save to database | |
| for engine in search_engines: | |
| save_analysis_to_db(search_query, brand_name, engine, results) | |
| # Track co-mentions if multiple brands | |
| if len(brand_names) > 1: | |
| for result in results: | |
| if result.get('analysis', {}).get('explicit_mentions') or result.get('analysis', {}).get('indirect_mentions'): | |
| # Find which brands are mentioned in this article | |
| mentioned_brands = [] | |
| for other_brand in brand_names: | |
| if other_brand != brand_name: | |
| # Check if other brand is mentioned | |
| article_content = result.get('content', '').lower() | |
| if other_brand.lower() in article_content: | |
| mentioned_brands.append(other_brand) | |
| if mentioned_brands: | |
| mentioned_brands.append(brand_name) | |
| # This will be saved when we have article_id | |
| st.success("β Batch analysis complete and saved to database!") | |
| st.rerun() | |
| # Display results | |
| if st.session_state.batch_results: | |
| st.markdown("---") | |
| st.subheader("π Analysis Results") | |
| # Summary metrics | |
| total_brands = len(st.session_state.batch_results) | |
| total_articles = sum(len(results) for results in st.session_state.batch_results.values()) | |
| total_mentions = sum( | |
| sum(r.get('total_mentions', 0) for r in results) | |
| for results in st.session_state.batch_results.values() | |
| ) | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.metric("Brands Analyzed", total_brands) | |
| with col2: | |
| st.metric("Total Articles", total_articles) | |
| with col3: | |
| st.metric("Total Mentions", total_mentions) | |
| # Display results by brand | |
| for brand_name, results in st.session_state.batch_results.items(): | |
| with st.expander(f"**{brand_name}** - {len(results)} articles"): | |
| if not results: | |
| st.info("No results found") | |
| continue | |
| # Brand-specific metrics | |
| mentions_count = sum(r.get('total_mentions', 0) for r in results) | |
| articles_with_mentions = sum(1 for r in results if r.get('total_mentions', 0) > 0) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.metric("Articles with Mentions", articles_with_mentions) | |
| with col2: | |
| st.metric("Total Mentions", mentions_count) | |
| # Show top mentions | |
| for i, result in enumerate(results[:5]): # Show top 5 | |
| analysis = result.get('analysis', {}) | |
| if analysis.get('explicit_mentions') or analysis.get('indirect_mentions'): | |
| st.markdown(f"**π {result['title'][:80]}...**") | |
| st.caption(f"π {result['url']} | Source: {result.get('source', 'unknown')}") | |
| for mention in analysis.get('explicit_mentions', [])[:2]: | |
| sentiment_emoji = {"positive": "π", "negative": "π", "neutral": "π"}.get(mention.get('sentiment'), "π") | |
| st.markdown(f"- {sentiment_emoji} *{mention.get('mention', '')}*") | |
| elif not st.session_state.batch_results: | |
| st.info("π Configure your analysis in the sidebar and click 'Start Batch Analysis' to begin") | |
| def render_dashboard(): | |
| """Render the analytics dashboard""" | |
| st.title("π Brand Analytics Dashboard") | |
| # Get historical data | |
| analyses = get_historical_analyses(limit=100) | |
| if not analyses: | |
| st.info("No historical data available. Run some analyses first!") | |
| return | |
| # Filter controls | |
| st.sidebar.subheader("π Dashboard Filters") | |
| # Brand filter | |
| all_brands = list(set(a.brand_name for a in analyses)) | |
| selected_brands = st.sidebar.multiselect( | |
| "Filter by Brand", | |
| all_brands, | |
| default=all_brands[:5] if len(all_brands) > 5 else all_brands | |
| ) | |
| # Time filter | |
| time_range = st.sidebar.selectbox( | |
| "Time Range", | |
| ["Last 24 hours", "Last 7 days", "Last 30 days", "All time"] | |
| ) | |
| # Filter analyses | |
| filtered_analyses = [a for a in analyses if a.brand_name in selected_brands] | |
| # Summary metrics | |
| col1, col2, col3, col4 = st.columns(4) | |
| total_analyses = len(filtered_analyses) | |
| total_mentions = sum(a.total_mentions for a in filtered_analyses) | |
| avg_sentiment = sum(a.positive_count for a in filtered_analyses) / max(total_mentions, 1) | |
| with col1: | |
| st.metric("Total Analyses", total_analyses) | |
| with col2: | |
| st.metric("Total Mentions", total_mentions) | |
| with col3: | |
| st.metric("Avg Positive %", f"{avg_sentiment*100:.1f}%") | |
| with col4: | |
| active_brands = len(set(a.brand_name for a in filtered_analyses)) | |
| st.metric("Active Brands", active_brands) | |
| # Sentiment Distribution Chart | |
| st.subheader("π Sentiment Distribution") | |
| sentiment_data = [] | |
| for analysis in filtered_analyses: | |
| sentiment_data.append({ | |
| 'Positive': analysis.positive_count, | |
| 'Negative': analysis.negative_count, | |
| 'Neutral': analysis.neutral_count | |
| }) | |
| if sentiment_data: | |
| total_positive = sum(d['Positive'] for d in sentiment_data) | |
| total_negative = sum(d['Negative'] for d in sentiment_data) | |
| total_neutral = sum(d['Neutral'] for d in sentiment_data) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| # Pie chart | |
| fig_pie = go.Figure(data=[go.Pie( | |
| labels=['Positive', 'Negative', 'Neutral'], | |
| values=[total_positive, total_negative, total_neutral], | |
| marker=dict(colors=['#00D26A', '#FF5C5C', '#FFD700']) | |
| )]) | |
| fig_pie.update_layout(title="Overall Sentiment Distribution") | |
| st.plotly_chart(fig_pie, use_container_width=True) | |
| with col2: | |
| # Bar chart by brand | |
| brand_sentiment = defaultdict(lambda: {'positive': 0, 'negative': 0, 'neutral': 0}) | |
| for analysis in filtered_analyses: | |
| brand_sentiment[analysis.brand_name]['positive'] += analysis.positive_count | |
| brand_sentiment[analysis.brand_name]['negative'] += analysis.negative_count | |
| brand_sentiment[analysis.brand_name]['neutral'] += analysis.neutral_count | |
| brands = list(brand_sentiment.keys()) | |
| positive_vals = [brand_sentiment[b]['positive'] for b in brands] | |
| negative_vals = [brand_sentiment[b]['negative'] for b in brands] | |
| neutral_vals = [brand_sentiment[b]['neutral'] for b in brands] | |
| fig_bar = go.Figure(data=[ | |
| go.Bar(name='Positive', x=brands, y=positive_vals, marker_color='#00D26A'), | |
| go.Bar(name='Negative', x=brands, y=negative_vals, marker_color='#FF5C5C'), | |
| go.Bar(name='Neutral', x=brands, y=neutral_vals, marker_color='#FFD700') | |
| ]) | |
| fig_bar.update_layout( | |
| title="Sentiment by Brand", | |
| barmode='stack', | |
| xaxis_title="Brand", | |
| yaxis_title="Mentions" | |
| ) | |
| st.plotly_chart(fig_bar, use_container_width=True) | |
| # Trend over time | |
| st.subheader("π Mention Trends Over Time") | |
| trend_data = [] | |
| for analysis in filtered_analyses: | |
| trend_data.append({ | |
| 'Date': analysis.created_at.date(), | |
| 'Brand': analysis.brand_name, | |
| 'Mentions': analysis.total_mentions | |
| }) | |
| if trend_data: | |
| df_trend = pd.DataFrame(trend_data) | |
| fig_trend = px.line( | |
| df_trend, | |
| x='Date', | |
| y='Mentions', | |
| color='Brand', | |
| title="Brand Mentions Over Time" | |
| ) | |
| st.plotly_chart(fig_trend, use_container_width=True) | |
| # Detailed mentions table with filtering | |
| st.subheader("π Detailed Mentions") | |
| # Get all mentions for filtered analyses | |
| all_mentions = [] | |
| for analysis in filtered_analyses: | |
| mentions = get_all_mentions(analysis_id=analysis.id) | |
| all_mentions.extend(mentions) | |
| if all_mentions: | |
| # Sentiment filter | |
| sentiment_filter = st.multiselect( | |
| "Filter by Sentiment", | |
| ["positive", "negative", "neutral"], | |
| default=["positive", "negative", "neutral"] | |
| ) | |
| # Sort options | |
| sort_by = st.selectbox( | |
| "Sort by", | |
| ["Date (Newest)", "Date (Oldest)", "Confidence (High to Low)", "Confidence (Low to High)"] | |
| ) | |
| # Filter mentions | |
| filtered_mentions = [m for m in all_mentions if m.sentiment in sentiment_filter] | |
| # Sort mentions | |
| if sort_by == "Date (Newest)": | |
| filtered_mentions.sort(key=lambda x: x.created_at, reverse=True) | |
| elif sort_by == "Date (Oldest)": | |
| filtered_mentions.sort(key=lambda x: x.created_at) | |
| elif sort_by == "Confidence (High to Low)": | |
| filtered_mentions.sort(key=lambda x: x.confidence, reverse=True) | |
| else: | |
| filtered_mentions.sort(key=lambda x: x.confidence) | |
| # Display mentions | |
| for mention in filtered_mentions[:20]: # Limit to 20 | |
| sentiment_emoji = {"positive": "π", "negative": "π", "neutral": "π"}.get(mention.sentiment, "π") | |
| with st.expander(f"{sentiment_emoji} {mention.brand_name} - {mention.mention_type} ({mention.confidence:.0%} confidence)"): | |
| st.markdown(f"**Mention:** {mention.mention_text}") | |
| st.markdown(f"**Context:** {mention.context}") | |
| st.caption(f"**Explanation:** {mention.explanation}") | |
| st.caption(f"**Date:** {mention.created_at.strftime('%Y-%m-%d %H:%M')}") | |
| def render_co_mention_network(): | |
| """Render co-mention network visualization""" | |
| st.title("πΈοΈ Brand Co-Mention Network") | |
| st.markdown("Visualize which brands are frequently mentioned together in articles") | |
| co_mentions = get_co_mention_network() | |
| if not co_mentions: | |
| st.info("No co-mention data available. Analyze multiple brands together to see relationships!") | |
| return | |
| # Build network graph | |
| G = nx.Graph() | |
| # Add edges with weights | |
| edge_data = defaultdict(int) | |
| for cm in co_mentions: | |
| edge_data[(cm.brand1, cm.brand2)] += cm.co_occurrence_count | |
| for (brand1, brand2), count in edge_data.items(): | |
| G.add_edge(brand1, brand2, weight=count) | |
| # Calculate layout | |
| pos = nx.spring_layout(G, k=2, iterations=50) | |
| # Create edge trace | |
| edge_traces = [] | |
| for edge in G.edges(): | |
| x0, y0 = pos[edge[0]] | |
| x1, y1 = pos[edge[1]] | |
| weight = G[edge[0]][edge[1]]['weight'] | |
| edge_trace = go.Scatter( | |
| x=[x0, x1, None], | |
| y=[y0, y1, None], | |
| mode='lines', | |
| line=dict(width=weight*2, color='#888'), | |
| hoverinfo='text', | |
| text=f"{edge[0]} β {edge[1]}: {weight} co-mentions", | |
| showlegend=False | |
| ) | |
| edge_traces.append(edge_trace) | |
| # Create node trace | |
| node_x = [] | |
| node_y = [] | |
| node_text = [] | |
| node_size = [] | |
| for node in G.nodes(): | |
| x, y = pos[node] | |
| node_x.append(x) | |
| node_y.append(y) | |
| # Calculate node size based on connections | |
| connections = G.degree(node) | |
| node_size.append(30 + connections * 10) | |
| node_text.append(f"{node}<br>Connections: {connections}") | |
| node_trace = go.Scatter( | |
| x=node_x, | |
| y=node_y, | |
| mode='markers+text', | |
| text=[node for node in G.nodes()], | |
| textposition="top center", | |
| hovertext=node_text, | |
| hoverinfo='text', | |
| marker=dict( | |
| size=node_size, | |
| color='#1f77b4', | |
| line=dict(width=2, color='white') | |
| ), | |
| showlegend=False | |
| ) | |
| # Create figure | |
| fig = go.Figure(data=edge_traces + [node_trace]) | |
| fig.update_layout( | |
| title="Brand Co-Mention Network", | |
| title_font_size=20, | |
| showlegend=False, | |
| hovermode='closest', | |
| margin=dict(b=0, l=0, r=0, t=40), | |
| xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), | |
| yaxis=dict(showgrid=False, zeroline=False, showticklabels=False), | |
| height=600 | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Network statistics | |
| st.subheader("π Network Statistics") | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.metric("Total Brands", len(G.nodes())) | |
| with col2: | |
| st.metric("Total Relationships", len(G.edges())) | |
| with col3: | |
| density = nx.density(G) | |
| st.metric("Network Density", f"{density:.2%}") | |
| # Top co-mentions | |
| st.subheader("π Top Co-Mentions") | |
| top_pairs = sorted(edge_data.items(), key=lambda x: x[1], reverse=True)[:10] | |
| for (brand1, brand2), count in top_pairs: | |
| st.write(f"**{brand1}** β **{brand2}**: {count} co-mentions") | |
| def render_scheduled_monitoring(): | |
| """Render scheduled monitoring page""" | |
| st.title("β° Scheduled Brand Monitoring") | |
| st.markdown("Set up recurring brand analyses") | |
| # Create new schedule | |
| with st.expander("β Create New Schedule", expanded=True): | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| schedule_query = st.text_input("Search Query", placeholder="AI technology news") | |
| schedule_brands = st.text_area( | |
| "Brand Names (one per line)", | |
| placeholder="OpenAI\nGoogle\nMicrosoft" | |
| ) | |
| with col2: | |
| schedule_engines = st.multiselect( | |
| "Search Engines", | |
| ["google", "bing", "duckduckgo"], | |
| default=["google"] | |
| ) | |
| schedule_frequency = st.selectbox( | |
| "Frequency", | |
| ["daily", "weekly", "monthly"] | |
| ) | |
| if st.button("Create Schedule"): | |
| if schedule_query and schedule_brands: | |
| brands = [b.strip() for b in schedule_brands.split('\n') if b.strip()] | |
| job_id = create_scheduled_job( | |
| schedule_query, | |
| brands, | |
| schedule_engines, | |
| schedule_frequency | |
| ) | |
| if job_id: | |
| st.success(f"β Schedule created successfully! (ID: {job_id})") | |
| st.rerun() | |
| else: | |
| st.error("Please fill in all fields") | |
| # List existing schedules | |
| st.subheader("π Active Schedules") | |
| jobs = get_scheduled_jobs(active_only=True) | |
| if not jobs: | |
| st.info("No active schedules. Create one above!") | |
| else: | |
| for job in jobs: | |
| with st.expander(f"π {job.search_query} - {job.schedule_type}"): | |
| st.write(f"**Brands:** {job.brand_names}") | |
| st.write(f"**Engines:** {job.search_engines}") | |
| st.write(f"**Frequency:** {job.schedule_type}") | |
| if job.last_run: | |
| st.write(f"**Last Run:** {job.last_run.strftime('%Y-%m-%d %H:%M')}") | |
| if job.next_run: | |
| st.write(f"**Next Run:** {job.next_run.strftime('%Y-%m-%d %H:%M')}") | |
| st.caption(f"Created: {job.created_at.strftime('%Y-%m-%d')}") | |
| def render_history(): | |
| """Render analysis history""" | |
| st.title("π Analysis History") | |
| analyses = get_historical_analyses(limit=50) | |
| if not analyses: | |
| st.info("No historical analyses available") | |
| return | |
| # Create DataFrame | |
| history_data = [] | |
| for a in analyses: | |
| history_data.append({ | |
| 'Date': a.created_at.strftime('%Y-%m-%d %H:%M'), | |
| 'Brand': a.brand_name, | |
| 'Query': a.search_query, | |
| 'Engine': a.search_engine, | |
| 'Articles': a.total_articles, | |
| 'Mentions': a.total_mentions, | |
| 'Positive': a.positive_count, | |
| 'Negative': a.negative_count, | |
| 'Neutral': a.neutral_count | |
| }) | |
| df_history = pd.DataFrame(history_data) | |
| # Display with filtering | |
| brand_filter = st.multiselect( | |
| "Filter by Brand", | |
| df_history['Brand'].unique(), | |
| default=None | |
| ) | |
| if brand_filter: | |
| df_history = df_history[df_history['Brand'].isin(brand_filter)] | |
| st.dataframe(df_history, use_container_width=True) | |
| # Main routing | |
| if page == "Analysis": | |
| render_analysis_page() | |
| elif page == "Dashboard": | |
| render_dashboard() | |
| elif page == "Co-Mention Network": | |
| render_co_mention_network() | |
| elif page == "Scheduled Monitoring": | |
| render_scheduled_monitoring() | |
| elif page == "History": | |
| render_history() | |