BrandScanAI / app.py
Arun21102003
Fix: Add database initialization on startup
8641f78
import streamlit as st
import os
from datetime import datetime
import pandas as pd
from web_scraper import scrape_article_content
from brand_analyzer import BrandAnalyzer
from search_engines import multi_engine_search, batch_analyze_brands
from db_operations import (
save_analysis_to_db, get_historical_analyses, get_all_mentions,
save_co_mentions, get_co_mention_network, create_scheduled_job,
get_scheduled_jobs
)
import plotly.express as px
import plotly.graph_objects as go
import networkx as nx
from collections import Counter, defaultdict
from scheduler import get_scheduler
from database import init_database
from dotenv import load_dotenv
# Load environment variables and initialize database
load_dotenv()
init_database()
# Page configuration
st.set_page_config(
page_title="Brand Monitoring Dashboard",
page_icon="πŸ”",
layout="wide"
)
# Initialize scheduler
try:
scheduler = get_scheduler()
except Exception as e:
st.warning(f"Scheduler initialization warning: {e}")
# Initialize session state
if 'batch_results' not in st.session_state:
st.session_state.batch_results = {}
if 'current_page' not in st.session_state:
st.session_state.current_page = 'Analysis'
if 'selected_analysis_ids' not in st.session_state:
st.session_state.selected_analysis_ids = []
# Sidebar navigation
st.sidebar.title("πŸ” Brand Monitor Pro")
page = st.sidebar.radio(
"Navigation",
["Analysis", "Dashboard", "Co-Mention Network", "Scheduled Monitoring", "History"]
)
def create_csv_export(results: dict) -> str:
"""Create CSV content from batch analysis results"""
csv_data = []
for brand_name, analysis_results in results.items():
for result in analysis_results:
analysis = result.get('analysis', {})
# Add explicit mentions
for mention in analysis.get('explicit_mentions', []):
csv_data.append({
'Brand': brand_name,
'URL': result['url'],
'Article Title': result['title'],
'Source': result.get('source', 'unknown'),
'Mention Type': 'Explicit',
'Mention Text': mention.get('mention', ''),
'Context': mention.get('context', ''),
'Sentiment': mention.get('sentiment', ''),
'Explanation': mention.get('explanation', ''),
'Timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
})
# Add indirect mentions
for mention in analysis.get('indirect_mentions', []):
csv_data.append({
'Brand': brand_name,
'URL': result['url'],
'Article Title': result['title'],
'Source': result.get('source', 'unknown'),
'Mention Type': 'Indirect',
'Mention Text': mention.get('reference', ''),
'Context': mention.get('context', ''),
'Sentiment': mention.get('sentiment', ''),
'Explanation': mention.get('explanation', ''),
'Timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
})
if csv_data:
df = pd.DataFrame(csv_data)
return df.to_csv(index=False)
else:
return "No data to export"
def render_analysis_page():
"""Render the main analysis page with batch processing"""
st.title("πŸ” Brand Mention Analysis")
st.markdown("Analyze multiple brands across different search engines simultaneously")
# Configuration sidebar
with st.sidebar:
st.header("πŸ”§ Configuration")
search_query = st.text_input(
"Search Query",
placeholder="e.g., AI startups 2024",
help="Base search query to find relevant articles"
)
brand_names_input = st.text_area(
"Brand Names (one per line)",
placeholder="OpenAI\nAnthropic\nGoogle AI",
help="Enter brand names to monitor, one per line"
)
# Search engine selection
st.subheader("🌐 Search Engines")
use_google = st.checkbox("Google (SerpAPI)", value=True)
use_bing = st.checkbox("Bing")
use_duckduckgo = st.checkbox("DuckDuckGo")
search_engines = []
if use_google:
search_engines.append('google')
if use_bing:
search_engines.append('bing')
if use_duckduckgo:
search_engines.append('duckduckgo')
num_results = st.slider(
"Results per engine",
min_value=5,
max_value=15,
value=10
)
custom_prompt = st.text_area(
"Custom Analysis Prompt (Optional)",
placeholder="Leave empty for default analysis...",
height=100
)
analyze_button = st.button("πŸš€ Start Batch Analysis", type="primary", use_container_width=True)
# Export section
if st.session_state.batch_results:
st.markdown("---")
st.subheader("πŸ“₯ Export Results")
csv_content = create_csv_export(st.session_state.batch_results)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
st.download_button(
label="πŸ’Ύ Download CSV Report",
data=csv_content,
file_name=f"batch_brand_analysis_{timestamp}.csv",
mime="text/csv",
use_container_width=True
)
# Main content
if analyze_button:
if not search_query:
st.error("⚠️ Please enter a search query")
return
if not brand_names_input.strip():
st.error("⚠️ Please enter at least one brand name")
return
if not search_engines:
st.error("⚠️ Please select at least one search engine")
return
# Parse brand names
brand_names = [b.strip() for b in brand_names_input.strip().split('\n') if b.strip()]
st.info(f"πŸ“Š Analyzing {len(brand_names)} brand(s) across {len(search_engines)} search engine(s)")
# Batch analysis
analyzer = BrandAnalyzer()
batch_results = batch_analyze_brands(
search_query,
brand_names,
search_engines,
num_results,
custom_prompt,
analyzer,
scrape_article_content
)
# Save to database and session state
st.session_state.batch_results = batch_results
for brand_name, results in batch_results.items():
if results:
# Save to database
for engine in search_engines:
save_analysis_to_db(search_query, brand_name, engine, results)
# Track co-mentions if multiple brands
if len(brand_names) > 1:
for result in results:
if result.get('analysis', {}).get('explicit_mentions') or result.get('analysis', {}).get('indirect_mentions'):
# Find which brands are mentioned in this article
mentioned_brands = []
for other_brand in brand_names:
if other_brand != brand_name:
# Check if other brand is mentioned
article_content = result.get('content', '').lower()
if other_brand.lower() in article_content:
mentioned_brands.append(other_brand)
if mentioned_brands:
mentioned_brands.append(brand_name)
# This will be saved when we have article_id
st.success("βœ… Batch analysis complete and saved to database!")
st.rerun()
# Display results
if st.session_state.batch_results:
st.markdown("---")
st.subheader("πŸ“Š Analysis Results")
# Summary metrics
total_brands = len(st.session_state.batch_results)
total_articles = sum(len(results) for results in st.session_state.batch_results.values())
total_mentions = sum(
sum(r.get('total_mentions', 0) for r in results)
for results in st.session_state.batch_results.values()
)
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Brands Analyzed", total_brands)
with col2:
st.metric("Total Articles", total_articles)
with col3:
st.metric("Total Mentions", total_mentions)
# Display results by brand
for brand_name, results in st.session_state.batch_results.items():
with st.expander(f"**{brand_name}** - {len(results)} articles"):
if not results:
st.info("No results found")
continue
# Brand-specific metrics
mentions_count = sum(r.get('total_mentions', 0) for r in results)
articles_with_mentions = sum(1 for r in results if r.get('total_mentions', 0) > 0)
col1, col2 = st.columns(2)
with col1:
st.metric("Articles with Mentions", articles_with_mentions)
with col2:
st.metric("Total Mentions", mentions_count)
# Show top mentions
for i, result in enumerate(results[:5]): # Show top 5
analysis = result.get('analysis', {})
if analysis.get('explicit_mentions') or analysis.get('indirect_mentions'):
st.markdown(f"**πŸ“„ {result['title'][:80]}...**")
st.caption(f"πŸ”— {result['url']} | Source: {result.get('source', 'unknown')}")
for mention in analysis.get('explicit_mentions', [])[:2]:
sentiment_emoji = {"positive": "😊", "negative": "😞", "neutral": "😐"}.get(mention.get('sentiment'), "😐")
st.markdown(f"- {sentiment_emoji} *{mention.get('mention', '')}*")
elif not st.session_state.batch_results:
st.info("πŸ‘ˆ Configure your analysis in the sidebar and click 'Start Batch Analysis' to begin")
def render_dashboard():
"""Render the analytics dashboard"""
st.title("πŸ“Š Brand Analytics Dashboard")
# Get historical data
analyses = get_historical_analyses(limit=100)
if not analyses:
st.info("No historical data available. Run some analyses first!")
return
# Filter controls
st.sidebar.subheader("πŸ“Š Dashboard Filters")
# Brand filter
all_brands = list(set(a.brand_name for a in analyses))
selected_brands = st.sidebar.multiselect(
"Filter by Brand",
all_brands,
default=all_brands[:5] if len(all_brands) > 5 else all_brands
)
# Time filter
time_range = st.sidebar.selectbox(
"Time Range",
["Last 24 hours", "Last 7 days", "Last 30 days", "All time"]
)
# Filter analyses
filtered_analyses = [a for a in analyses if a.brand_name in selected_brands]
# Summary metrics
col1, col2, col3, col4 = st.columns(4)
total_analyses = len(filtered_analyses)
total_mentions = sum(a.total_mentions for a in filtered_analyses)
avg_sentiment = sum(a.positive_count for a in filtered_analyses) / max(total_mentions, 1)
with col1:
st.metric("Total Analyses", total_analyses)
with col2:
st.metric("Total Mentions", total_mentions)
with col3:
st.metric("Avg Positive %", f"{avg_sentiment*100:.1f}%")
with col4:
active_brands = len(set(a.brand_name for a in filtered_analyses))
st.metric("Active Brands", active_brands)
# Sentiment Distribution Chart
st.subheader("πŸ“ˆ Sentiment Distribution")
sentiment_data = []
for analysis in filtered_analyses:
sentiment_data.append({
'Positive': analysis.positive_count,
'Negative': analysis.negative_count,
'Neutral': analysis.neutral_count
})
if sentiment_data:
total_positive = sum(d['Positive'] for d in sentiment_data)
total_negative = sum(d['Negative'] for d in sentiment_data)
total_neutral = sum(d['Neutral'] for d in sentiment_data)
col1, col2 = st.columns(2)
with col1:
# Pie chart
fig_pie = go.Figure(data=[go.Pie(
labels=['Positive', 'Negative', 'Neutral'],
values=[total_positive, total_negative, total_neutral],
marker=dict(colors=['#00D26A', '#FF5C5C', '#FFD700'])
)])
fig_pie.update_layout(title="Overall Sentiment Distribution")
st.plotly_chart(fig_pie, use_container_width=True)
with col2:
# Bar chart by brand
brand_sentiment = defaultdict(lambda: {'positive': 0, 'negative': 0, 'neutral': 0})
for analysis in filtered_analyses:
brand_sentiment[analysis.brand_name]['positive'] += analysis.positive_count
brand_sentiment[analysis.brand_name]['negative'] += analysis.negative_count
brand_sentiment[analysis.brand_name]['neutral'] += analysis.neutral_count
brands = list(brand_sentiment.keys())
positive_vals = [brand_sentiment[b]['positive'] for b in brands]
negative_vals = [brand_sentiment[b]['negative'] for b in brands]
neutral_vals = [brand_sentiment[b]['neutral'] for b in brands]
fig_bar = go.Figure(data=[
go.Bar(name='Positive', x=brands, y=positive_vals, marker_color='#00D26A'),
go.Bar(name='Negative', x=brands, y=negative_vals, marker_color='#FF5C5C'),
go.Bar(name='Neutral', x=brands, y=neutral_vals, marker_color='#FFD700')
])
fig_bar.update_layout(
title="Sentiment by Brand",
barmode='stack',
xaxis_title="Brand",
yaxis_title="Mentions"
)
st.plotly_chart(fig_bar, use_container_width=True)
# Trend over time
st.subheader("πŸ“… Mention Trends Over Time")
trend_data = []
for analysis in filtered_analyses:
trend_data.append({
'Date': analysis.created_at.date(),
'Brand': analysis.brand_name,
'Mentions': analysis.total_mentions
})
if trend_data:
df_trend = pd.DataFrame(trend_data)
fig_trend = px.line(
df_trend,
x='Date',
y='Mentions',
color='Brand',
title="Brand Mentions Over Time"
)
st.plotly_chart(fig_trend, use_container_width=True)
# Detailed mentions table with filtering
st.subheader("πŸ” Detailed Mentions")
# Get all mentions for filtered analyses
all_mentions = []
for analysis in filtered_analyses:
mentions = get_all_mentions(analysis_id=analysis.id)
all_mentions.extend(mentions)
if all_mentions:
# Sentiment filter
sentiment_filter = st.multiselect(
"Filter by Sentiment",
["positive", "negative", "neutral"],
default=["positive", "negative", "neutral"]
)
# Sort options
sort_by = st.selectbox(
"Sort by",
["Date (Newest)", "Date (Oldest)", "Confidence (High to Low)", "Confidence (Low to High)"]
)
# Filter mentions
filtered_mentions = [m for m in all_mentions if m.sentiment in sentiment_filter]
# Sort mentions
if sort_by == "Date (Newest)":
filtered_mentions.sort(key=lambda x: x.created_at, reverse=True)
elif sort_by == "Date (Oldest)":
filtered_mentions.sort(key=lambda x: x.created_at)
elif sort_by == "Confidence (High to Low)":
filtered_mentions.sort(key=lambda x: x.confidence, reverse=True)
else:
filtered_mentions.sort(key=lambda x: x.confidence)
# Display mentions
for mention in filtered_mentions[:20]: # Limit to 20
sentiment_emoji = {"positive": "😊", "negative": "😞", "neutral": "😐"}.get(mention.sentiment, "😐")
with st.expander(f"{sentiment_emoji} {mention.brand_name} - {mention.mention_type} ({mention.confidence:.0%} confidence)"):
st.markdown(f"**Mention:** {mention.mention_text}")
st.markdown(f"**Context:** {mention.context}")
st.caption(f"**Explanation:** {mention.explanation}")
st.caption(f"**Date:** {mention.created_at.strftime('%Y-%m-%d %H:%M')}")
def render_co_mention_network():
"""Render co-mention network visualization"""
st.title("πŸ•ΈοΈ Brand Co-Mention Network")
st.markdown("Visualize which brands are frequently mentioned together in articles")
co_mentions = get_co_mention_network()
if not co_mentions:
st.info("No co-mention data available. Analyze multiple brands together to see relationships!")
return
# Build network graph
G = nx.Graph()
# Add edges with weights
edge_data = defaultdict(int)
for cm in co_mentions:
edge_data[(cm.brand1, cm.brand2)] += cm.co_occurrence_count
for (brand1, brand2), count in edge_data.items():
G.add_edge(brand1, brand2, weight=count)
# Calculate layout
pos = nx.spring_layout(G, k=2, iterations=50)
# Create edge trace
edge_traces = []
for edge in G.edges():
x0, y0 = pos[edge[0]]
x1, y1 = pos[edge[1]]
weight = G[edge[0]][edge[1]]['weight']
edge_trace = go.Scatter(
x=[x0, x1, None],
y=[y0, y1, None],
mode='lines',
line=dict(width=weight*2, color='#888'),
hoverinfo='text',
text=f"{edge[0]} ↔ {edge[1]}: {weight} co-mentions",
showlegend=False
)
edge_traces.append(edge_trace)
# Create node trace
node_x = []
node_y = []
node_text = []
node_size = []
for node in G.nodes():
x, y = pos[node]
node_x.append(x)
node_y.append(y)
# Calculate node size based on connections
connections = G.degree(node)
node_size.append(30 + connections * 10)
node_text.append(f"{node}<br>Connections: {connections}")
node_trace = go.Scatter(
x=node_x,
y=node_y,
mode='markers+text',
text=[node for node in G.nodes()],
textposition="top center",
hovertext=node_text,
hoverinfo='text',
marker=dict(
size=node_size,
color='#1f77b4',
line=dict(width=2, color='white')
),
showlegend=False
)
# Create figure
fig = go.Figure(data=edge_traces + [node_trace])
fig.update_layout(
title="Brand Co-Mention Network",
title_font_size=20,
showlegend=False,
hovermode='closest',
margin=dict(b=0, l=0, r=0, t=40),
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
height=600
)
st.plotly_chart(fig, use_container_width=True)
# Network statistics
st.subheader("πŸ“Š Network Statistics")
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Total Brands", len(G.nodes()))
with col2:
st.metric("Total Relationships", len(G.edges()))
with col3:
density = nx.density(G)
st.metric("Network Density", f"{density:.2%}")
# Top co-mentions
st.subheader("πŸ” Top Co-Mentions")
top_pairs = sorted(edge_data.items(), key=lambda x: x[1], reverse=True)[:10]
for (brand1, brand2), count in top_pairs:
st.write(f"**{brand1}** ↔ **{brand2}**: {count} co-mentions")
def render_scheduled_monitoring():
"""Render scheduled monitoring page"""
st.title("⏰ Scheduled Brand Monitoring")
st.markdown("Set up recurring brand analyses")
# Create new schedule
with st.expander("βž• Create New Schedule", expanded=True):
col1, col2 = st.columns(2)
with col1:
schedule_query = st.text_input("Search Query", placeholder="AI technology news")
schedule_brands = st.text_area(
"Brand Names (one per line)",
placeholder="OpenAI\nGoogle\nMicrosoft"
)
with col2:
schedule_engines = st.multiselect(
"Search Engines",
["google", "bing", "duckduckgo"],
default=["google"]
)
schedule_frequency = st.selectbox(
"Frequency",
["daily", "weekly", "monthly"]
)
if st.button("Create Schedule"):
if schedule_query and schedule_brands:
brands = [b.strip() for b in schedule_brands.split('\n') if b.strip()]
job_id = create_scheduled_job(
schedule_query,
brands,
schedule_engines,
schedule_frequency
)
if job_id:
st.success(f"βœ… Schedule created successfully! (ID: {job_id})")
st.rerun()
else:
st.error("Please fill in all fields")
# List existing schedules
st.subheader("πŸ“… Active Schedules")
jobs = get_scheduled_jobs(active_only=True)
if not jobs:
st.info("No active schedules. Create one above!")
else:
for job in jobs:
with st.expander(f"πŸ”” {job.search_query} - {job.schedule_type}"):
st.write(f"**Brands:** {job.brand_names}")
st.write(f"**Engines:** {job.search_engines}")
st.write(f"**Frequency:** {job.schedule_type}")
if job.last_run:
st.write(f"**Last Run:** {job.last_run.strftime('%Y-%m-%d %H:%M')}")
if job.next_run:
st.write(f"**Next Run:** {job.next_run.strftime('%Y-%m-%d %H:%M')}")
st.caption(f"Created: {job.created_at.strftime('%Y-%m-%d')}")
def render_history():
"""Render analysis history"""
st.title("πŸ“š Analysis History")
analyses = get_historical_analyses(limit=50)
if not analyses:
st.info("No historical analyses available")
return
# Create DataFrame
history_data = []
for a in analyses:
history_data.append({
'Date': a.created_at.strftime('%Y-%m-%d %H:%M'),
'Brand': a.brand_name,
'Query': a.search_query,
'Engine': a.search_engine,
'Articles': a.total_articles,
'Mentions': a.total_mentions,
'Positive': a.positive_count,
'Negative': a.negative_count,
'Neutral': a.neutral_count
})
df_history = pd.DataFrame(history_data)
# Display with filtering
brand_filter = st.multiselect(
"Filter by Brand",
df_history['Brand'].unique(),
default=None
)
if brand_filter:
df_history = df_history[df_history['Brand'].isin(brand_filter)]
st.dataframe(df_history, use_container_width=True)
# Main routing
if page == "Analysis":
render_analysis_page()
elif page == "Dashboard":
render_dashboard()
elif page == "Co-Mention Network":
render_co_mention_network()
elif page == "Scheduled Monitoring":
render_scheduled_monitoring()
elif page == "History":
render_history()