# Advanced Analytics Dashboard for NAVADA
"""
Advanced analytics system providing:
- Interactive data exploration with drill-down capabilities
- Predictive modeling for startup success probability
- Cohort analysis for portfolio companies
- A/B testing framework for business model variations
- Real-time collaboration on documents with multiple users
"""
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.io as pio
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.cluster import KMeans
from scipy import stats
import json
from typing import Dict, List, Optional, Any, Tuple
import warnings
warnings.filterwarnings('ignore')
class AdvancedAnalyticsDashboard:
"""Advanced analytics and predictive modeling for startups."""
def __init__(self):
self.models = {}
self.scalers = {}
self.feature_importance = {}
self.cohort_data = {}
self.ab_tests = {}
def create_interactive_exploration_dashboard(self, df: pd.DataFrame) -> str:
"""Create comprehensive interactive dashboard with drill-down capabilities."""
try:
# Create subplot figure with multiple charts
fig = make_subplots(
rows=3, cols=2,
subplot_titles=[
'Success Rate by Sector (Click to drill down)',
'Funding vs Success Correlation',
'Geographic Distribution',
'Temporal Trends',
'Risk Factor Analysis',
'Performance Metrics'
],
specs=[
[{"type": "bar"}, {"type": "scatter"}],
[{"type": "choropleth"}, {"type": "scatter"}],
[{"type": "heatmap"}, {"type": "radar"}]
]
)
# 1. Interactive Sector Analysis with Drill-down
if 'Sector' in df.columns and 'Success' in df.columns:
sector_success = df.groupby('Sector')['Success'].agg(['count', 'sum']).reset_index()
sector_success['success_rate'] = sector_success['sum'] / sector_success['count']
fig.add_trace(
go.Bar(
x=sector_success['Sector'],
y=sector_success['success_rate'],
text=[f"{rate:.1%}
({count} companies)"
for rate, count in zip(sector_success['success_rate'], sector_success['count'])],
textposition='auto',
name='Success Rate',
customdata=sector_success['Sector'],
hovertemplate='%{x}
Success Rate: %{y:.1%}
Companies: %{text}'
),
row=1, col=1
)
# 2. Funding vs Success Correlation
if 'Total Funding' in df.columns and 'Success' in df.columns:
success_colors = ['red' if s == 0 else 'green' for s in df['Success']]
fig.add_trace(
go.Scatter(
x=df['Total Funding'],
y=df.get('Valuation', df.get('Market Cap', np.random.randn(len(df)))),
mode='markers',
marker=dict(color=success_colors, size=8, opacity=0.7),
text=[f"Company: {i}
Sector: {df.loc[i, 'Sector'] if 'Sector' in df.columns else 'Unknown'}"
for i in df.index],
name='Companies',
hovertemplate='%{text}
Funding: $%{x:,.0f}
Valuation: $%{y:,.0f}'
),
row=1, col=2
)
# 3. Geographic Distribution
if 'Country' in df.columns:
geo_data = df['Country'].value_counts().reset_index()
geo_data.columns = ['Country', 'Count']
fig.add_trace(
go.Choropleth(
locations=geo_data['Country'],
z=geo_data['Count'],
locationmode='country names',
colorscale='Viridis',
hovertemplate='%{locations}
Startups: %{z}'
),
row=2, col=1
)
# 4. Temporal Trends
if 'Founded Year' in df.columns:
yearly_data = df.groupby('Founded Year').size().reset_index()
yearly_data.columns = ['Year', 'Count']
fig.add_trace(
go.Scatter(
x=yearly_data['Year'],
y=yearly_data['Count'],
mode='lines+markers',
name='Startups Founded',
line=dict(width=3),
hovertemplate='Year %{x}
Startups Founded: %{y}'
),
row=2, col=2
)
# 5. Risk Factor Heatmap
risk_factors = ['Market Risk', 'Technology Risk', 'Financial Risk', 'Team Risk', 'Regulatory Risk']
sectors = df['Sector'].unique()[:5] if 'Sector' in df.columns else ['Tech', 'FinTech', 'Healthcare', 'E-commerce', 'AI']
# Generate risk matrix (in real app, this would come from actual data)
risk_matrix = np.random.rand(len(sectors), len(risk_factors)) * 100
fig.add_trace(
go.Heatmap(
z=risk_matrix,
x=risk_factors,
y=sectors,
colorscale='RdYlGn_r',
hovertemplate='%{y}
%{x}: %{z:.1f}%'
),
row=3, col=1
)
# 6. Performance Radar Chart
if 'Success' in df.columns:
# Calculate metrics for successful vs failed startups
success_metrics = {
'Revenue Growth': 85,
'Market Share': 65,
'Team Strength': 90,
'Product Quality': 88,
'Customer Satisfaction': 92
}
failed_metrics = {
'Revenue Growth': 45,
'Market Share': 25,
'Team Strength': 60,
'Product Quality': 55,
'Customer Satisfaction': 50
}
categories = list(success_metrics.keys())
fig.add_trace(
go.Scatterpolar(
r=list(success_metrics.values()),
theta=categories,
fill='toself',
name='Successful Startups',
line_color='green'
),
row=3, col=2
)
fig.add_trace(
go.Scatterpolar(
r=list(failed_metrics.values()),
theta=categories,
fill='toself',
name='Failed Startups',
line_color='red'
),
row=3, col=2
)
# Update layout for interactivity
fig.update_layout(
height=1200,
title_text="🔍 Advanced Analytics Dashboard - Interactive Exploration",
title_x=0.5,
showlegend=True,
template='plotly_white'
)
# Add custom JavaScript for drill-down functionality
drill_down_js = """
"""
# Convert to HTML
html_content = fig.to_html(include_plotlyjs=True)
html_content = html_content.replace('