# dashboard.py
"""
SEO Keyword Research Dashboard
A Streamlit web interface for the keyword research pipeline.
Provides interactive analysis, visualization, and download capabilities.
Requirements:
pip install streamlit plotly pandas
Usage:
streamlit run dashboard.py
"""
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os
import sys
from pathlib import Path
from datetime import date, datetime
import re
import json
import io
from typing import Optional, Tuple, Dict, Any
# Add project directories to path
project_root = Path(__file__).parent
src_path = project_root / "src"
if src_path.exists():
sys.path.insert(0, str(src_path))
sys.path.insert(0, str(project_root))
# Import backend functions
try:
from dotenv import load_dotenv
load_dotenv()
except ImportError:
st.error("Missing required package: python-dotenv. Install with: pip install python-dotenv")
st.stop()
# Page configuration
st.set_page_config(
page_title="SEO Keyword Research Dashboard",
page_icon="🔍",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS for better styling
st.markdown("""
""", unsafe_allow_html=True)
class KeywordDashboard:
"""Main dashboard class for SEO keyword research interface."""
def __init__(self):
"""Initialize the dashboard with necessary configurations."""
self.setup_directories()
self.check_environment()
def setup_directories(self):
"""Create necessary output directories."""
self.output_dir = Path("output")
self.processed_dir = self.output_dir / "processed"
self.reports_dir = self.output_dir / "reports"
self.output_dir.mkdir(exist_ok=True)
self.processed_dir.mkdir(exist_ok=True)
self.reports_dir.mkdir(exist_ok=True)
def check_environment(self):
"""Check if the environment is properly configured."""
self.api_key = os.getenv("SERPAPI_KEY")
self.environment_ready = bool(self.api_key)
def render_header(self):
"""Render the main dashboard header."""
st.markdown('
🔍 SEO Keyword Research Dashboard
',
unsafe_allow_html=True)
if not self.environment_ready:
st.markdown("""
⚠️ Environment Setup Required
Please ensure your .env file contains: SERPAPI_KEY=your_key_here
""", unsafe_allow_html=True)
return False
st.markdown("""
✅ Environment Ready
API key detected and ready for keyword research.
""", unsafe_allow_html=True)
return True
def render_sidebar(self) -> Dict[str, Any]:
"""Render the sidebar with input controls."""
st.sidebar.markdown("## 🎯 Analysis Parameters")
# Input parameters
seed_keyword = st.sidebar.text_input(
"🔍 Seed Keyword",
value="global internship",
help="Enter the main keyword to research"
)
max_candidates = st.sidebar.slider(
"📊 Max Candidates",
min_value=20,
max_value=300,
value=120,
step=10,
help="Maximum number of keyword candidates to analyze"
)
top_results = st.sidebar.slider(
"🏆 Top Results",
min_value=10,
max_value=100,
value=50,
step=5,
help="Number of top results to display and save"
)
# Advanced options
st.sidebar.markdown("## ⚙️ Advanced Options")
use_volume_api = st.sidebar.checkbox(
"📈 Use Real Volume API",
value=False,
help="Enable when volume API is implemented",
disabled=True # Disabled until implemented
)
# Filtering options
st.sidebar.markdown("## 🔧 Filters")
min_search_volume = st.sidebar.number_input(
"📈 Min Search Volume",
min_value=0,
max_value=10000,
value=10,
step=10,
help="Minimum monthly search volume"
)
max_competition = st.sidebar.slider(
"⚔️ Max Competition Score",
min_value=0.0,
max_value=1.0,
value=1.0,
step=0.1,
help="Maximum competition score (0=easy, 1=hard)"
)
# Run button
run_analysis = st.sidebar.button(
"🚀 Run Analysis",
type="primary",
help="Start the keyword research analysis"
)
return {
"seed_keyword": seed_keyword,
"max_candidates": max_candidates,
"top_results": top_results,
"use_volume_api": use_volume_api,
"min_search_volume": min_search_volume,
"max_competition": max_competition,
"run_analysis": run_analysis
}
def run_keyword_analysis(self, params: Dict[str, Any]) -> Optional[pd.DataFrame]:
"""Run the keyword analysis using the backend pipeline."""
try:
# Import the analysis function from app.py
sys.path.insert(0, str(project_root))
# Since we need to reuse the logic from app.py, let's import what we need
import math
import csv
import re
from serpapi import GoogleSearch
from dataclasses import dataclass
@dataclass
class KeywordMetrics:
keyword: str
monthly_searches: int
competition_score: float
opportunity_score: float
total_results: int
ads_count: int
has_featured_snippet: bool
has_people_also_ask: bool
has_knowledge_graph: bool
# Competition calculator (from your app.py)
class CompetitionCalculator:
WEIGHTS = {
'total_results': 0.50,
'ads': 0.25,
'featured_snippet': 0.15,
'people_also_ask': 0.07,
'knowledge_graph': 0.03
}
@staticmethod
def extract_total_results(search_info):
if not search_info:
return 0
total = (search_info.get("total_results") or
search_info.get("total_results_raw") or
search_info.get("total"))
if isinstance(total, int):
return total
if isinstance(total, str):
numbers_only = re.sub(r"[^\d]", "", total)
try:
return int(numbers_only) if numbers_only else 0
except ValueError:
return 0
return 0
def calculate_score(self, search_results):
search_info = search_results.get("search_information", {})
total_results = self.extract_total_results(search_info)
normalized_results = min(math.log10(total_results + 1) / 7, 1.0)
ads = search_results.get("ads_results", [])
ads_count = len(ads) if ads else 0
ads_score = min(ads_count / 3, 1.0)
has_featured_snippet = bool(
search_results.get("featured_snippet") or
search_results.get("answer_box")
)
has_people_also_ask = bool(
search_results.get("related_questions") or
search_results.get("people_also_ask")
)
has_knowledge_graph = bool(search_results.get("knowledge_graph"))
competition_score = (
self.WEIGHTS['total_results'] * normalized_results +
self.WEIGHTS['ads'] * ads_score +
self.WEIGHTS['featured_snippet'] * has_featured_snippet +
self.WEIGHTS['people_also_ask'] * has_people_also_ask +
self.WEIGHTS['knowledge_graph'] * has_knowledge_graph
)
competition_score = max(0.0, min(1.0, competition_score))
breakdown = {
"total_results": total_results,
"ads_count": ads_count,
"has_featured_snippet": has_featured_snippet,
"has_people_also_ask": has_people_also_ask,
"has_knowledge_graph": has_knowledge_graph
}
return competition_score, breakdown
def find_related_keywords(seed_keyword, max_results=120):
progress_placeholder = st.empty()
progress_placeholder.info(f"🔍 Finding related keywords for: '{seed_keyword}'...")
search_params = {
"engine": "google",
"q": seed_keyword,
"api_key": self.api_key,
"hl": "en",
"gl": "us"
}
try:
search = GoogleSearch(search_params)
results = search.get_dict()
except Exception as e:
progress_placeholder.error(f"❌ Error getting related keywords: {e}")
return []
keyword_candidates = set()
# Extract keywords from different sources
related_searches = results.get("related_searches", [])
for item in related_searches:
query = item.get("query") or item.get("suggestion")
if query and len(query.strip()) > 0:
keyword_candidates.add(query.strip())
related_questions = results.get("related_questions", [])
for item in related_questions:
question = item.get("question") or item.get("query")
if question and len(question.strip()) > 0:
keyword_candidates.add(question.strip())
organic_results = results.get("organic_results", [])
for result in organic_results[:10]:
title = result.get("title", "")
if title and len(title.strip()) > 0:
keyword_candidates.add(title.strip())
final_keywords = list(keyword_candidates)[:max_results]
progress_placeholder.success(f"✅ Found {len(final_keywords)} keyword candidates")
return final_keywords
def analyze_keywords_batch(keywords):
calculator = CompetitionCalculator()
analyzed_keywords = []
progress_bar = st.progress(0)
status_text = st.empty()
for i, keyword in enumerate(keywords):
progress = (i + 1) / len(keywords)
progress_bar.progress(progress)
status_text.text(f"Analyzing keyword {i+1}/{len(keywords)}: {keyword}")
# Search for keyword
search_params = {
"engine": "google",
"q": keyword,
"api_key": self.api_key,
"hl": "en",
"gl": "us",
"num": 10
}
try:
search = GoogleSearch(search_params)
search_results = search.get_dict()
except Exception as e:
continue
# Calculate competition
competition_score, breakdown = calculator.calculate_score(search_results)
# Estimate volume
word_count = len(keyword.split())
search_volume = max(10, 10000 // (word_count + 1))
# Calculate opportunity score
volume_score = math.log10(search_volume + 1)
opportunity_score = volume_score / (competition_score + 0.01)
metrics = KeywordMetrics(
keyword=keyword,
monthly_searches=search_volume,
competition_score=round(competition_score, 4),
opportunity_score=round(opportunity_score, 2),
total_results=breakdown["total_results"],
ads_count=breakdown["ads_count"],
has_featured_snippet=breakdown["has_featured_snippet"],
has_people_also_ask=breakdown["has_people_also_ask"],
has_knowledge_graph=breakdown["has_knowledge_graph"]
)
analyzed_keywords.append(metrics)
progress_bar.empty()
status_text.empty()
# Sort by opportunity score
analyzed_keywords.sort(key=lambda x: x.opportunity_score, reverse=True)
return analyzed_keywords
# Run the analysis
with st.spinner("🔍 Discovering related keywords..."):
related_keywords = find_related_keywords(
params["seed_keyword"],
params["max_candidates"]
)
if not related_keywords:
st.error("❌ No keyword candidates found. Please check your API key and try again.")
return None
with st.spinner("📊 Analyzing keywords and calculating scores..."):
analyzed_keywords = analyze_keywords_batch(related_keywords)
if not analyzed_keywords:
st.error("❌ No keywords were successfully analyzed.")
return None
# Convert to DataFrame
data = []
for metrics in analyzed_keywords:
data.append({
'Keyword': metrics.keyword,
'Monthly Searches': metrics.monthly_searches,
'Competition': metrics.competition_score,
'Opportunity Score': metrics.opportunity_score,
'Total Results': metrics.total_results,
'Ads Count': metrics.ads_count,
'Featured Snippet': 'Yes' if metrics.has_featured_snippet else 'No',
'People Also Ask': 'Yes' if metrics.has_people_also_ask else 'No',
'Knowledge Graph': 'Yes' if metrics.has_knowledge_graph else 'No'
})
df = pd.DataFrame(data)
# Apply filters
df = df[
(df['Monthly Searches'] >= params['min_search_volume']) &
(df['Competition'] <= params['max_competition'])
]
return df
except Exception as e:
st.error(f"❌ Analysis failed: {str(e)}")
return None
def add_enhancement_columns(self, df: pd.DataFrame) -> pd.DataFrame:
"""Add intent and tail classification columns."""
def classify_intent(keyword):
if not keyword:
return "informational"
k = keyword.lower()
if any(signal in k for signal in ["how to", "what is", "why", "guide", "tutorial"]):
return "informational"
if any(signal in k for signal in ["buy", "price", "cost", "apply", "register"]):
return "transactional"
if any(signal in k for signal in ["best", "top", "compare", "vs", "reviews"]):
return "commercial"
return "informational"
def classify_tail(keyword):
if not keyword:
return "short-tail"
word_count = len(str(keyword).split())
if word_count >= 4:
return "long-tail"
elif word_count == 3:
return "mid-tail"
else:
return "short-tail"
df['Intent'] = df['Keyword'].apply(classify_intent)
df['Tail'] = df['Keyword'].apply(classify_tail)
return df
def render_summary_metrics(self, df: pd.DataFrame):
"""Render summary metrics cards."""
col1, col2, col3, col4 = st.columns(4)
with col1:
st.markdown("""
📊 Total Keywords
{}
""".format(len(df)), unsafe_allow_html=True)
with col2:
avg_score = df['Opportunity Score'].mean()
st.markdown("""
⭐ Avg Opportunity Score
{:.2f}
""".format(avg_score), unsafe_allow_html=True)
with col3:
high_opportunity = len(df[df['Opportunity Score'] > 50])
st.markdown("""
🚀 High Opportunity
{}
""".format(high_opportunity), unsafe_allow_html=True)
with col4:
long_tail = len(df[df['Tail'] == 'long-tail'])
st.markdown("""
🎯 Long-tail Keywords
{}
""".format(long_tail), unsafe_allow_html=True)
def render_top_keywords_table(self, df: pd.DataFrame, top_n: int = 10):
"""Render the top keywords table with styling."""
st.markdown("## 🏆 Top Keyword Opportunities")
if df.empty:
st.warning("No keywords to display.")
return
# Prepare display DataFrame
display_df = df.head(top_n).copy()
# Format columns for better display
display_df['Monthly Searches'] = display_df['Monthly Searches'].apply(lambda x: f"{x:,}")
display_df['Total Results'] = display_df['Total Results'].apply(lambda x: f"{x:,}")
# Style the dataframe
def highlight_max_score(s):
is_max = s == s.max()
return ['background-color: lightgreen' if v else '' for v in is_max]
styled_df = display_df.style.apply(
highlight_max_score,
subset=['Opportunity Score']
).format({
'Competition': '{:.3f}',
'Opportunity Score': '{:.2f}'
})
st.dataframe(styled_df, use_container_width=True)
def render_visualizations(self, df: pd.DataFrame):
"""Render interactive charts and visualizations."""
if df.empty:
st.warning("No data available for visualization.")
return
# Chart selection tabs
chart_tab1, chart_tab2, chart_tab3 = st.tabs(["📊 Opportunity Scores", "🎯 Intent Analysis", "💹 Volume vs Competition"])
with chart_tab1:
st.markdown("### Top 10 Keywords by Opportunity Score")
top_10 = df.head(10)
fig = px.bar(
top_10,
x='Opportunity Score',
y='Keyword',
orientation='h',
title="Top 10 Keyword Opportunities",
color='Opportunity Score',
color_continuous_scale='viridis'
)
fig.update_layout(height=500, yaxis={'categoryorder': 'total ascending'})
st.plotly_chart(fig, use_container_width=True)
with chart_tab2:
st.markdown("### Intent Distribution")
col1, col2 = st.columns(2)
with col1:
intent_counts = df['Intent'].value_counts()
fig_pie = px.pie(
values=intent_counts.values,
names=intent_counts.index,
title="Search Intent Distribution",
color_discrete_sequence=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']
)
st.plotly_chart(fig_pie, use_container_width=True)
with col2:
tail_counts = df['Tail'].value_counts()
fig_tail = px.pie(
values=tail_counts.values,
names=tail_counts.index,
title="Keyword Tail Distribution",
color_discrete_sequence=['#9467bd', '#8c564b', '#e377c2']
)
st.plotly_chart(fig_tail, use_container_width=True)
with chart_tab3:
st.markdown("### Search Volume vs Competition Analysis")
fig_scatter = px.scatter(
df.head(50), # Limit to top 50 for readability
x='Competition',
y='Monthly Searches',
size='Opportunity Score',
color='Intent',
hover_name='Keyword',
title="Search Volume vs Competition (Size = Opportunity Score)",
labels={'Competition': 'Competition Score', 'Monthly Searches': 'Est. Monthly Searches'}
)
fig_scatter.update_layout(height=500)
st.plotly_chart(fig_scatter, use_container_width=True)
def save_results(self, df: pd.DataFrame, params: Dict[str, Any]) -> Tuple[str, str, str]:
"""Save results to files and return file paths."""
if df.empty:
return None, None, None
# Generate file names
today = date.today().isoformat()
safe_seed = re.sub(r"[^\w\s-]", "", params['seed_keyword']).strip().replace(" ", "_")[:30]
base_name = f"keywords_{safe_seed}_{today}"
# File paths
csv_path = self.processed_dir / f"{base_name}.csv"
excel_path = self.processed_dir / f"{base_name}.xlsx"
report_path = self.reports_dir / f"{base_name}_report.json"
try:
# Save CSV
df.to_csv(csv_path, index=False)
# Save Excel with multiple sheets
with pd.ExcelWriter(excel_path, engine='openpyxl') as writer:
df.head(params['top_results']).to_excel(writer, sheet_name='Top_Results', index=False)
df.to_excel(writer, sheet_name='All_Keywords', index=False)
# Summary sheet
summary_data = {
'Metric': [
'Total Keywords',
'Average Opportunity Score',
'High Opportunity Keywords (>50)',
'Long-tail Keywords',
'Informational Intent',
'Commercial Intent',
'Transactional Intent'
],
'Value': [
len(df),
round(df['Opportunity Score'].mean(), 2),
len(df[df['Opportunity Score'] > 50]),
len(df[df['Tail'] == 'long-tail']),
len(df[df['Intent'] == 'informational']),
len(df[df['Intent'] == 'commercial']),
len(df[df['Intent'] == 'transactional'])
]
}
pd.DataFrame(summary_data).to_excel(writer, sheet_name='Summary', index=False)
# Save JSON report
report_data = {
'analysis_date': datetime.now().isoformat(),
'seed_keyword': params['seed_keyword'],
'parameters': {
'max_candidates': params['max_candidates'],
'top_results': params['top_results'],
'min_search_volume': params['min_search_volume'],
'max_competition': params['max_competition']
},
'summary': {
'total_keywords': len(df),
'average_opportunity_score': float(df['Opportunity Score'].mean()),
'top_keyword': df.iloc[0]['Keyword'] if not df.empty else None,
'intent_distribution': df['Intent'].value_counts().to_dict(),
'tail_distribution': df['Tail'].value_counts().to_dict()
}
}
with open(report_path, 'w', encoding='utf-8') as f:
json.dump(report_data, f, indent=2, ensure_ascii=False)
return str(csv_path), str(excel_path), str(report_path)
except Exception as e:
st.error(f"❌ Error saving files: {e}")
return None, None, None
def render_download_section(self, csv_path: str, excel_path: str, report_path: str):
"""Render download buttons for generated files."""
st.markdown("## 📥 Download Results")
col1, col2, col3 = st.columns(3)
if csv_path and os.path.exists(csv_path):
with col1:
with open(csv_path, 'rb') as file:
st.download_button(
label="📊 Download CSV",
data=file.read(),
file_name=os.path.basename(csv_path),
mime="text/csv"
)
if excel_path and os.path.exists(excel_path):
with col2:
with open(excel_path, 'rb') as file:
st.download_button(
label="📈 Download Excel",
data=file.read(),
file_name=os.path.basename(excel_path),
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)
if report_path and os.path.exists(report_path):
with col3:
with open(report_path, 'rb') as file:
st.download_button(
label="📋 Download Report",
data=file.read(),
file_name=os.path.basename(report_path),
mime="application/json"
)
def run(self):
"""Main dashboard execution method."""
# Render header
if not self.render_header():
st.stop()
# Render sidebar
params = self.render_sidebar()
# Main content area
if params["run_analysis"]:
# Store analysis state
if 'analysis_complete' not in st.session_state:
st.session_state.analysis_complete = False
# Run analysis
df = self.run_keyword_analysis(params)
if df is not None and not df.empty:
# Add enhancement columns
df = self.add_enhancement_columns(df)
# Store results in session state
st.session_state.results_df = df
st.session_state.analysis_params = params
st.session_state.analysis_complete = True
# Success message
st.success(f"✅ Analysis complete! Found {len(df)} keywords matching your criteria.")
# Display results if analysis is complete
if st.session_state.get('analysis_complete', False) and 'results_df' in st.session_state:
df = st.session_state.results_df
params = st.session_state.analysis_params
# Render summary metrics
self.render_summary_metrics(df)
# Create view toggle
view_option = st.radio("📋 Choose View", ["Table View", "Chart View"], horizontal=True)
if view_option == "Table View":
self.render_top_keywords_table(df, params['top_results'])
else:
self.render_visualizations(df)
# Save results and provide downloads
with st.spinner("💾 Preparing download files..."):
csv_path, excel_path, report_path = self.save_results(df, params)
if csv_path:
self.render_download_section(csv_path, excel_path, report_path)
elif not st.session_state.get('analysis_complete', False):
# Show welcome message
st.markdown("""
## 👋 Welcome to the SEO Keyword Research Dashboard
This dashboard helps you discover and analyze keyword opportunities using advanced SEO metrics.
### 🚀 Getting Started:
1. **Enter your seed keyword** in the sidebar (e.g., "digital marketing")
2. **Adjust analysis parameters** (candidates, results, filters)
3. **Click "Run Analysis"** to start the keyword research
4. **Explore results** through tables and interactive charts
5. **Download reports** in CSV, Excel, or JSON format
### 📊 Features:
- **Real-time keyword discovery** using SerpAPI
- **Competition analysis** based on SERP features
- **Intent classification** (informational, commercial, transactional)
- **Interactive visualizations** with Plotly charts
- **Advanced filtering** by volume and competition
- **Multi-format exports** (CSV, Excel, JSON reports)
""")
def main():
"""Main function to run the Streamlit dashboard."""
dashboard = KeywordDashboard()
dashboard.run()
if __name__ == "__main__":
main()