Spaces:

spamultrapromax
/

BrandScanAI

Sleeping

Arun21102003

Fix: Add database initialization on startup

8641f78 13 days ago

24.4 kB

	import streamlit as st
	import os
	from datetime import datetime
	import pandas as pd
	from web_scraper import scrape_article_content
	from brand_analyzer import BrandAnalyzer
	from search_engines import multi_engine_search, batch_analyze_brands
	from db_operations import (
	save_analysis_to_db, get_historical_analyses, get_all_mentions,
	save_co_mentions, get_co_mention_network, create_scheduled_job,
	get_scheduled_jobs
	)
	import plotly.express as px
	import plotly.graph_objects as go
	import networkx as nx
	from collections import Counter, defaultdict
	from scheduler import get_scheduler
	from database import init_database
	from dotenv import load_dotenv

	# Load environment variables and initialize database
	load_dotenv()
	init_database()

	# Page configuration
	st.set_page_config(
	page_title="Brand Monitoring Dashboard",
	page_icon="🔍",
	layout="wide"
	)

	# Initialize scheduler
	try:
	scheduler = get_scheduler()
	except Exception as e:
	st.warning(f"Scheduler initialization warning: {e}")

	# Initialize session state
	if 'batch_results' not in st.session_state:
	st.session_state.batch_results = {}
	if 'current_page' not in st.session_state:
	st.session_state.current_page = 'Analysis'
	if 'selected_analysis_ids' not in st.session_state:
	st.session_state.selected_analysis_ids = []

	# Sidebar navigation
	st.sidebar.title("🔍 Brand Monitor Pro")
	page = st.sidebar.radio(
	"Navigation",
	["Analysis", "Dashboard", "Co-Mention Network", "Scheduled Monitoring", "History"]
	)

	def create_csv_export(results: dict) -> str:
	"""Create CSV content from batch analysis results"""
	csv_data = []

	for brand_name, analysis_results in results.items():
	for result in analysis_results:
	analysis = result.get('analysis', {})

	# Add explicit mentions
	for mention in analysis.get('explicit_mentions', []):
	csv_data.append({
	'Brand': brand_name,
	'URL': result['url'],
	'Article Title': result['title'],
	'Source': result.get('source', 'unknown'),
	'Mention Type': 'Explicit',
	'Mention Text': mention.get('mention', ''),
	'Context': mention.get('context', ''),
	'Sentiment': mention.get('sentiment', ''),
	'Explanation': mention.get('explanation', ''),
	'Timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
	})

	# Add indirect mentions
	for mention in analysis.get('indirect_mentions', []):
	csv_data.append({
	'Brand': brand_name,
	'URL': result['url'],
	'Article Title': result['title'],
	'Source': result.get('source', 'unknown'),
	'Mention Type': 'Indirect',
	'Mention Text': mention.get('reference', ''),
	'Context': mention.get('context', ''),
	'Sentiment': mention.get('sentiment', ''),
	'Explanation': mention.get('explanation', ''),
	'Timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
	})

	if csv_data:
	df = pd.DataFrame(csv_data)
	return df.to_csv(index=False)
	else:
	return "No data to export"

	def render_analysis_page():
	"""Render the main analysis page with batch processing"""
	st.title("🔍 Brand Mention Analysis")
	st.markdown("Analyze multiple brands across different search engines simultaneously")

	# Configuration sidebar
	with st.sidebar:
	st.header("🔧 Configuration")

	search_query = st.text_input(
	"Search Query",
	placeholder="e.g., AI startups 2024",
	help="Base search query to find relevant articles"
	)

	brand_names_input = st.text_area(
	"Brand Names (one per line)",
	placeholder="OpenAI\nAnthropic\nGoogle AI",
	help="Enter brand names to monitor, one per line"
	)

	# Search engine selection
	st.subheader("🌐 Search Engines")
	use_google = st.checkbox("Google (SerpAPI)", value=True)
	use_bing = st.checkbox("Bing")
	use_duckduckgo = st.checkbox("DuckDuckGo")

	search_engines = []
	if use_google:
	search_engines.append('google')
	if use_bing:
	search_engines.append('bing')
	if use_duckduckgo:
	search_engines.append('duckduckgo')

	num_results = st.slider(
	"Results per engine",
	min_value=5,
	max_value=15,
	value=10
	)

	custom_prompt = st.text_area(
	"Custom Analysis Prompt (Optional)",
	placeholder="Leave empty for default analysis...",
	height=100
	)

	analyze_button = st.button("🚀 Start Batch Analysis", type="primary", use_container_width=True)

	# Export section
	if st.session_state.batch_results:
	st.markdown("---")
	st.subheader("📥 Export Results")

	csv_content = create_csv_export(st.session_state.batch_results)
	timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

	st.download_button(
	label="💾 Download CSV Report",
	data=csv_content,
	file_name=f"batch_brand_analysis_{timestamp}.csv",
	mime="text/csv",
	use_container_width=True
	)

	# Main content
	if analyze_button:
	if not search_query:
	st.error("⚠️ Please enter a search query")
	return

	if not brand_names_input.strip():
	st.error("⚠️ Please enter at least one brand name")
	return

	if not search_engines:
	st.error("⚠️ Please select at least one search engine")
	return

	# Parse brand names
	brand_names = [b.strip() for b in brand_names_input.strip().split('\n') if b.strip()]

	st.info(f"📊 Analyzing {len(brand_names)} brand(s) across {len(search_engines)} search engine(s)")

	# Batch analysis
	analyzer = BrandAnalyzer()
	batch_results = batch_analyze_brands(
	search_query,
	brand_names,
	search_engines,
	num_results,
	custom_prompt,
	analyzer,
	scrape_article_content
	)

	# Save to database and session state
	st.session_state.batch_results = batch_results

	for brand_name, results in batch_results.items():
	if results:
	# Save to database
	for engine in search_engines:
	save_analysis_to_db(search_query, brand_name, engine, results)

	# Track co-mentions if multiple brands
	if len(brand_names) > 1:
	for result in results:
	if result.get('analysis', {}).get('explicit_mentions') or result.get('analysis', {}).get('indirect_mentions'):
	# Find which brands are mentioned in this article
	mentioned_brands = []
	for other_brand in brand_names:
	if other_brand != brand_name:
	# Check if other brand is mentioned
	article_content = result.get('content', '').lower()
	if other_brand.lower() in article_content:
	mentioned_brands.append(other_brand)

	if mentioned_brands:
	mentioned_brands.append(brand_name)
	# This will be saved when we have article_id

	st.success("✅ Batch analysis complete and saved to database!")
	st.rerun()

	# Display results
	if st.session_state.batch_results:
	st.markdown("---")
	st.subheader("📊 Analysis Results")

	# Summary metrics
	total_brands = len(st.session_state.batch_results)
	total_articles = sum(len(results) for results in st.session_state.batch_results.values())
	total_mentions = sum(
	sum(r.get('total_mentions', 0) for r in results)
	for results in st.session_state.batch_results.values()
	)

	col1, col2, col3 = st.columns(3)
	with col1:
	st.metric("Brands Analyzed", total_brands)
	with col2:
	st.metric("Total Articles", total_articles)
	with col3:
	st.metric("Total Mentions", total_mentions)

	# Display results by brand
	for brand_name, results in st.session_state.batch_results.items():
	with st.expander(f"{brand_name} - {len(results)} articles"):
	if not results:
	st.info("No results found")
	continue

	# Brand-specific metrics
	mentions_count = sum(r.get('total_mentions', 0) for r in results)
	articles_with_mentions = sum(1 for r in results if r.get('total_mentions', 0) > 0)

	col1, col2 = st.columns(2)
	with col1:
	st.metric("Articles with Mentions", articles_with_mentions)
	with col2:
	st.metric("Total Mentions", mentions_count)

	# Show top mentions
	for i, result in enumerate(results[:5]): # Show top 5
	analysis = result.get('analysis', {})
	if analysis.get('explicit_mentions') or analysis.get('indirect_mentions'):
	st.markdown(f"📄 {result['title'][:80]}...")
	st.caption(f"🔗 {result['url']} \| Source: {result.get('source', 'unknown')}")

	for mention in analysis.get('explicit_mentions', [])[:2]:
	sentiment_emoji = {"positive": "😊", "negative": "😞", "neutral": "😐"}.get(mention.get('sentiment'), "😐")
	st.markdown(f"- {sentiment_emoji} {mention.get('mention', '')}")

	elif not st.session_state.batch_results:
	st.info("👈 Configure your analysis in the sidebar and click 'Start Batch Analysis' to begin")

	def render_dashboard():
	"""Render the analytics dashboard"""
	st.title("📊 Brand Analytics Dashboard")

	# Get historical data
	analyses = get_historical_analyses(limit=100)

	if not analyses:
	st.info("No historical data available. Run some analyses first!")
	return

	# Filter controls
	st.sidebar.subheader("📊 Dashboard Filters")

	# Brand filter
	all_brands = list(set(a.brand_name for a in analyses))
	selected_brands = st.sidebar.multiselect(
	"Filter by Brand",
	all_brands,
	default=all_brands[:5] if len(all_brands) > 5 else all_brands
	)

	# Time filter
	time_range = st.sidebar.selectbox(
	"Time Range",
	["Last 24 hours", "Last 7 days", "Last 30 days", "All time"]
	)

	# Filter analyses
	filtered_analyses = [a for a in analyses if a.brand_name in selected_brands]

	# Summary metrics
	col1, col2, col3, col4 = st.columns(4)

	total_analyses = len(filtered_analyses)
	total_mentions = sum(a.total_mentions for a in filtered_analyses)
	avg_sentiment = sum(a.positive_count for a in filtered_analyses) / max(total_mentions, 1)

	with col1:
	st.metric("Total Analyses", total_analyses)
	with col2:
	st.metric("Total Mentions", total_mentions)
	with col3:
	st.metric("Avg Positive %", f"{avg_sentiment*100:.1f}%")
	with col4:
	active_brands = len(set(a.brand_name for a in filtered_analyses))
	st.metric("Active Brands", active_brands)

	# Sentiment Distribution Chart
	st.subheader("📈 Sentiment Distribution")

	sentiment_data = []
	for analysis in filtered_analyses:
	sentiment_data.append({
	'Positive': analysis.positive_count,
	'Negative': analysis.negative_count,
	'Neutral': analysis.neutral_count
	})

	if sentiment_data:
	total_positive = sum(d['Positive'] for d in sentiment_data)
	total_negative = sum(d['Negative'] for d in sentiment_data)
	total_neutral = sum(d['Neutral'] for d in sentiment_data)

	col1, col2 = st.columns(2)

	with col1:
	# Pie chart
	fig_pie = go.Figure(data=[go.Pie(
	labels=['Positive', 'Negative', 'Neutral'],
	values=[total_positive, total_negative, total_neutral],
	marker=dict(colors=['#00D26A', '#FF5C5C', '#FFD700'])
	)])
	fig_pie.update_layout(title="Overall Sentiment Distribution")
	st.plotly_chart(fig_pie, use_container_width=True)

	with col2:
	# Bar chart by brand
	brand_sentiment = defaultdict(lambda: {'positive': 0, 'negative': 0, 'neutral': 0})
	for analysis in filtered_analyses:
	brand_sentiment[analysis.brand_name]['positive'] += analysis.positive_count
	brand_sentiment[analysis.brand_name]['negative'] += analysis.negative_count
	brand_sentiment[analysis.brand_name]['neutral'] += analysis.neutral_count

	brands = list(brand_sentiment.keys())
	positive_vals = [brand_sentiment[b]['positive'] for b in brands]
	negative_vals = [brand_sentiment[b]['negative'] for b in brands]
	neutral_vals = [brand_sentiment[b]['neutral'] for b in brands]

	fig_bar = go.Figure(data=[
	go.Bar(name='Positive', x=brands, y=positive_vals, marker_color='#00D26A'),
	go.Bar(name='Negative', x=brands, y=negative_vals, marker_color='#FF5C5C'),
	go.Bar(name='Neutral', x=brands, y=neutral_vals, marker_color='#FFD700')
	])
	fig_bar.update_layout(
	title="Sentiment by Brand",
	barmode='stack',
	xaxis_title="Brand",
	yaxis_title="Mentions"
	)
	st.plotly_chart(fig_bar, use_container_width=True)

	# Trend over time
	st.subheader("📅 Mention Trends Over Time")

	trend_data = []
	for analysis in filtered_analyses:
	trend_data.append({
	'Date': analysis.created_at.date(),
	'Brand': analysis.brand_name,
	'Mentions': analysis.total_mentions
	})

	if trend_data:
	df_trend = pd.DataFrame(trend_data)
	fig_trend = px.line(
	df_trend,
	x='Date',
	y='Mentions',
	color='Brand',
	title="Brand Mentions Over Time"
	)
	st.plotly_chart(fig_trend, use_container_width=True)

	# Detailed mentions table with filtering
	st.subheader("🔍 Detailed Mentions")

	# Get all mentions for filtered analyses
	all_mentions = []
	for analysis in filtered_analyses:
	mentions = get_all_mentions(analysis_id=analysis.id)
	all_mentions.extend(mentions)

	if all_mentions:
	# Sentiment filter
	sentiment_filter = st.multiselect(
	"Filter by Sentiment",
	["positive", "negative", "neutral"],
	default=["positive", "negative", "neutral"]
	)

	# Sort options
	sort_by = st.selectbox(
	"Sort by",
	["Date (Newest)", "Date (Oldest)", "Confidence (High to Low)", "Confidence (Low to High)"]
	)

	# Filter mentions
	filtered_mentions = [m for m in all_mentions if m.sentiment in sentiment_filter]

	# Sort mentions
	if sort_by == "Date (Newest)":
	filtered_mentions.sort(key=lambda x: x.created_at, reverse=True)
	elif sort_by == "Date (Oldest)":
	filtered_mentions.sort(key=lambda x: x.created_at)
	elif sort_by == "Confidence (High to Low)":
	filtered_mentions.sort(key=lambda x: x.confidence, reverse=True)
	else:
	filtered_mentions.sort(key=lambda x: x.confidence)

	# Display mentions
	for mention in filtered_mentions[:20]: # Limit to 20
	sentiment_emoji = {"positive": "😊", "negative": "😞", "neutral": "😐"}.get(mention.sentiment, "😐")
	with st.expander(f"{sentiment_emoji} {mention.brand_name} - {mention.mention_type} ({mention.confidence:.0%} confidence)"):
	st.markdown(f"Mention: {mention.mention_text}")
	st.markdown(f"Context: {mention.context}")
	st.caption(f"Explanation: {mention.explanation}")
	st.caption(f"Date: {mention.created_at.strftime('%Y-%m-%d %H:%M')}")

	def render_co_mention_network():
	"""Render co-mention network visualization"""
	st.title("🕸️ Brand Co-Mention Network")
	st.markdown("Visualize which brands are frequently mentioned together in articles")

	co_mentions = get_co_mention_network()

	if not co_mentions:
	st.info("No co-mention data available. Analyze multiple brands together to see relationships!")
	return

	# Build network graph
	G = nx.Graph()

	# Add edges with weights
	edge_data = defaultdict(int)
	for cm in co_mentions:
	edge_data[(cm.brand1, cm.brand2)] += cm.co_occurrence_count

	for (brand1, brand2), count in edge_data.items():
	G.add_edge(brand1, brand2, weight=count)

	# Calculate layout
	pos = nx.spring_layout(G, k=2, iterations=50)

	# Create edge trace
	edge_traces = []
	for edge in G.edges():
	x0, y0 = pos[edge[0]]
	x1, y1 = pos[edge[1]]
	weight = G[edge[0]][edge[1]]['weight']

	edge_trace = go.Scatter(
	x=[x0, x1, None],
	y=[y0, y1, None],
	mode='lines',
	line=dict(width=weight*2, color='#888'),
	hoverinfo='text',
	text=f"{edge[0]} ↔ {edge[1]}: {weight} co-mentions",
	showlegend=False
	)
	edge_traces.append(edge_trace)

	# Create node trace
	node_x = []
	node_y = []
	node_text = []
	node_size = []

	for node in G.nodes():
	x, y = pos[node]
	node_x.append(x)
	node_y.append(y)

	# Calculate node size based on connections
	connections = G.degree(node)
	node_size.append(30 + connections * 10)
	node_text.append(f"{node}<br>Connections: {connections}")

	node_trace = go.Scatter(
	x=node_x,
	y=node_y,
	mode='markers+text',
	text=[node for node in G.nodes()],
	textposition="top center",
	hovertext=node_text,
	hoverinfo='text',
	marker=dict(
	size=node_size,
	color='#1f77b4',
	line=dict(width=2, color='white')
	),
	showlegend=False
	)

	# Create figure
	fig = go.Figure(data=edge_traces + [node_trace])

	fig.update_layout(
	title="Brand Co-Mention Network",
	title_font_size=20,
	showlegend=False,
	hovermode='closest',
	margin=dict(b=0, l=0, r=0, t=40),
	xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
	yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
	height=600
	)

	st.plotly_chart(fig, use_container_width=True)

	# Network statistics
	st.subheader("📊 Network Statistics")

	col1, col2, col3 = st.columns(3)

	with col1:
	st.metric("Total Brands", len(G.nodes()))
	with col2:
	st.metric("Total Relationships", len(G.edges()))
	with col3:
	density = nx.density(G)
	st.metric("Network Density", f"{density:.2%}")

	# Top co-mentions
	st.subheader("🔝 Top Co-Mentions")

	top_pairs = sorted(edge_data.items(), key=lambda x: x[1], reverse=True)[:10]

	for (brand1, brand2), count in top_pairs:
	st.write(f"{brand1} ↔ {brand2}: {count} co-mentions")

	def render_scheduled_monitoring():
	"""Render scheduled monitoring page"""
	st.title("⏰ Scheduled Brand Monitoring")
	st.markdown("Set up recurring brand analyses")

	# Create new schedule
	with st.expander("➕ Create New Schedule", expanded=True):
	col1, col2 = st.columns(2)

	with col1:
	schedule_query = st.text_input("Search Query", placeholder="AI technology news")
	schedule_brands = st.text_area(
	"Brand Names (one per line)",
	placeholder="OpenAI\nGoogle\nMicrosoft"
	)

	with col2:
	schedule_engines = st.multiselect(
	"Search Engines",
	["google", "bing", "duckduckgo"],
	default=["google"]
	)
	schedule_frequency = st.selectbox(
	"Frequency",
	["daily", "weekly", "monthly"]
	)

	if st.button("Create Schedule"):
	if schedule_query and schedule_brands:
	brands = [b.strip() for b in schedule_brands.split('\n') if b.strip()]
	job_id = create_scheduled_job(
	schedule_query,
	brands,
	schedule_engines,
	schedule_frequency
	)
	if job_id:
	st.success(f"✅ Schedule created successfully! (ID: {job_id})")
	st.rerun()
	else:
	st.error("Please fill in all fields")

	# List existing schedules
	st.subheader("📅 Active Schedules")

	jobs = get_scheduled_jobs(active_only=True)

	if not jobs:
	st.info("No active schedules. Create one above!")
	else:
	for job in jobs:
	with st.expander(f"🔔 {job.search_query} - {job.schedule_type}"):
	st.write(f"Brands: {job.brand_names}")
	st.write(f"Engines: {job.search_engines}")
	st.write(f"Frequency: {job.schedule_type}")
	if job.last_run:
	st.write(f"Last Run: {job.last_run.strftime('%Y-%m-%d %H:%M')}")
	if job.next_run:
	st.write(f"Next Run: {job.next_run.strftime('%Y-%m-%d %H:%M')}")
	st.caption(f"Created: {job.created_at.strftime('%Y-%m-%d')}")

	def render_history():
	"""Render analysis history"""
	st.title("📚 Analysis History")

	analyses = get_historical_analyses(limit=50)

	if not analyses:
	st.info("No historical analyses available")
	return

	# Create DataFrame
	history_data = []
	for a in analyses:
	history_data.append({
	'Date': a.created_at.strftime('%Y-%m-%d %H:%M'),
	'Brand': a.brand_name,
	'Query': a.search_query,
	'Engine': a.search_engine,
	'Articles': a.total_articles,
	'Mentions': a.total_mentions,
	'Positive': a.positive_count,
	'Negative': a.negative_count,
	'Neutral': a.neutral_count
	})

	df_history = pd.DataFrame(history_data)

	# Display with filtering
	brand_filter = st.multiselect(
	"Filter by Brand",
	df_history['Brand'].unique(),
	default=None
	)

	if brand_filter:
	df_history = df_history[df_history['Brand'].isin(brand_filter)]

	st.dataframe(df_history, use_container_width=True)

	# Main routing
	if page == "Analysis":
	render_analysis_page()
	elif page == "Dashboard":
	render_dashboard()
	elif page == "Co-Mention Network":
	render_co_mention_network()
	elif page == "Scheduled Monitoring":
	render_scheduled_monitoring()
	elif page == "History":
	render_history()