| import os |
| import base64 |
| import io |
| import pandas as pd |
| import plotly.express as px |
| import plotly.graph_objects as go |
| from dash import Dash, html, dcc, Input, Output, State, callback_context |
| import dash_bootstrap_components as dbc |
| import numpy as np |
| from scipy import stats |
| import re |
|
|
| |
| app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) |
| server = app.server |
|
|
| class AIVisualizationEngine: |
| def __init__(self, df): |
| self.df = df |
| self.numeric_cols = df.select_dtypes(include=['number']).columns.tolist() |
| self.categorical_cols = df.select_dtypes(include=['object']).columns.tolist() |
| self.datetime_cols = df.select_dtypes(include=['datetime64']).columns.tolist() |
| |
| def recommend_chart_type(self, x_col=None, y_col=None): |
| """AI-powered chart type recommendation""" |
| recommendations = [] |
| |
| if x_col and y_col: |
| x_type = 'numeric' if x_col in self.numeric_cols else 'categorical' |
| y_type = 'numeric' if y_col in self.numeric_cols else 'categorical' |
| |
| if x_type == 'numeric' and y_type == 'numeric': |
| recommendations = [ |
| {'type': 'scatter', 'confidence': 0.9, 'reason': 'Both variables are numeric - scatter plot shows correlation'}, |
| {'type': 'line', 'confidence': 0.7, 'reason': 'Line chart good for trends if X is ordered'}, |
| ] |
| elif x_type == 'categorical' and y_type == 'numeric': |
| recommendations = [ |
| {'type': 'bar', 'confidence': 0.9, 'reason': 'Categorical vs numeric - bar chart shows comparisons'}, |
| {'type': 'box', 'confidence': 0.8, 'reason': 'Box plot shows distribution across categories'}, |
| ] |
| elif x_type == 'categorical' and y_type == 'categorical': |
| recommendations = [ |
| {'type': 'bar', 'confidence': 0.8, 'reason': 'Count relationships between categories'}, |
| ] |
| elif x_col and not y_col: |
| if x_col in self.numeric_cols: |
| recommendations = [ |
| {'type': 'histogram', 'confidence': 0.9, 'reason': 'Single numeric variable - histogram shows distribution'}, |
| {'type': 'box', 'confidence': 0.7, 'reason': 'Box plot shows statistical summary'}, |
| ] |
| else: |
| recommendations = [ |
| {'type': 'pie', 'confidence': 0.8, 'reason': 'Categorical variable - pie chart shows proportions'}, |
| {'type': 'bar', 'confidence': 0.9, 'reason': 'Bar chart shows category frequencies'}, |
| ] |
| |
| return recommendations |
| |
| def detect_outliers(self, column): |
| """Detect outliers using IQR method""" |
| if column not in self.numeric_cols: |
| return [] |
| |
| Q1 = self.df[column].quantile(0.25) |
| Q3 = self.df[column].quantile(0.75) |
| IQR = Q3 - Q1 |
| lower_bound = Q1 - 1.5 * IQR |
| upper_bound = Q3 + 1.5 * IQR |
| |
| outliers = self.df[(self.df[column] < lower_bound) | (self.df[column] > upper_bound)] |
| return outliers.index.tolist() |
| |
| def generate_insights(self, x_col, y_col=None): |
| """Generate AI insights about the data""" |
| insights = [] |
| |
| if x_col in self.numeric_cols: |
| mean_val = self.df[x_col].mean() |
| median_val = self.df[x_col].median() |
| std_val = self.df[x_col].std() |
| |
| insights.append(f"π {x_col}: Mean = {mean_val:.2f}, Median = {median_val:.2f}") |
| |
| if abs(mean_val - median_val) > std_val * 0.5: |
| insights.append(f"β οΈ {x_col} distribution appears skewed") |
| |
| outliers = self.detect_outliers(x_col) |
| if outliers: |
| insights.append(f"π― Found {len(outliers)} potential outliers in {x_col}") |
| |
| if y_col and x_col in self.numeric_cols and y_col in self.numeric_cols: |
| correlation = self.df[x_col].corr(self.df[y_col]) |
| if abs(correlation) > 0.7: |
| strength = "strong" if abs(correlation) > 0.8 else "moderate" |
| direction = "positive" if correlation > 0 else "negative" |
| insights.append(f"π {strength.title()} {direction} correlation ({correlation:.3f}) between {x_col} and {y_col}") |
| elif abs(correlation) < 0.3: |
| insights.append(f"π Weak correlation ({correlation:.3f}) between {x_col} and {y_col}") |
| |
| return insights |
| |
| def parse_natural_language_query(self, query): |
| """Simple NLP to parse visualization requests""" |
| query = query.lower().strip() |
| |
| |
| chart_keywords = { |
| 'scatter': ['scatter', 'correlation', 'relationship'], |
| 'bar': ['bar', 'compare', 'comparison', 'by'], |
| 'histogram': ['histogram', 'distribution', 'freq'], |
| 'line': ['line', 'trend', 'over time', 'timeline'], |
| 'box': ['box', 'quartile', 'median'], |
| 'pie': ['pie', 'proportion', 'percentage'], |
| 'heatmap': ['heatmap', 'correlation matrix'] |
| } |
| |
| suggested_chart = None |
| for chart_type, keywords in chart_keywords.items(): |
| if any(keyword in query for keyword in keywords): |
| suggested_chart = chart_type |
| break |
| |
| |
| mentioned_cols = [] |
| for col in self.df.columns: |
| if col.lower() in query or col.lower().replace('_', ' ') in query: |
| mentioned_cols.append(col) |
| |
| return { |
| 'chart_type': suggested_chart, |
| 'columns': mentioned_cols, |
| 'query': query |
| } |
| |
| def get_smart_color_scheme(self, chart_type, column=None): |
| """AI-powered color scheme selection""" |
| color_schemes = { |
| 'scatter': 'Viridis', |
| 'line': 'Blues', |
| 'bar': 'Set3', |
| 'histogram': 'Plasma', |
| 'box': 'Set2', |
| 'pie': 'Pastel', |
| 'heatmap': 'RdBu_r' |
| } |
| return color_schemes.get(chart_type, 'Viridis') |
|
|
| |
| app.layout = dbc.Container([ |
| dbc.Row([ |
| dbc.Col([ |
| html.H1("π€ AI-Enhanced Data Dashboard", className="text-center mb-4"), |
| html.P("Upload data and let AI help you create intelligent visualizations!", |
| className="text-center text-muted"), |
| html.Hr(), |
| ], width=12) |
| ]), |
| |
| dbc.Row([ |
| dbc.Col([ |
| dbc.Card([ |
| dbc.CardBody([ |
| html.H4("π Data Upload", className="card-title"), |
| dcc.Upload( |
| id='upload-data', |
| children=html.Div([ |
| 'Drag and Drop or ', |
| html.A('Select Files') |
| ]), |
| style={ |
| 'width': '100%', |
| 'height': '60px', |
| 'lineHeight': '60px', |
| 'borderWidth': '1px', |
| 'borderStyle': 'dashed', |
| 'borderRadius': '5px', |
| 'textAlign': 'center', |
| 'margin': '10px' |
| }, |
| multiple=False, |
| accept='.csv,.xlsx,.txt' |
| ), |
| |
| html.Div(id='upload-status', className="mt-2"), |
| html.Hr(), |
| |
| html.H4("π― AI Query Interface", className="card-title"), |
| dbc.InputGroup([ |
| dbc.Input( |
| id="ai-query", |
| placeholder="Try: 'Show scatter plot of age vs salary' or 'Bar chart of departments'", |
| type="text", |
| ), |
| dbc.Button( |
| "π€ AI Create", |
| id="ai-create-btn", |
| color="primary", |
| n_clicks=0 |
| ) |
| ]), |
| |
| html.Div(id="ai-recommendations", className="mt-3"), |
| html.Hr(), |
| |
| html.H4("π Quick Analytics", className="card-title"), |
| dbc.ButtonGroup([ |
| dbc.Button("Summary Stats", id="stats-btn", size="sm"), |
| dbc.Button("AI Insights", id="insights-btn", size="sm"), |
| dbc.Button("Outliers", id="outliers-btn", size="sm"), |
| ], className="w-100"), |
| |
| html.Div(id="quick-analytics", className="mt-3") |
| ]) |
| ]) |
| ], width=4), |
| |
| dbc.Col([ |
| dbc.Card([ |
| dbc.CardBody([ |
| html.H4("π AI-Enhanced Visualizations", className="card-title"), |
| |
| |
| dbc.Row([ |
| dbc.Col([ |
| html.Label("Chart Type:", className="form-label"), |
| dcc.Dropdown( |
| id='chart-type', |
| options=[ |
| {'label': 'AI Recommend', 'value': 'ai_recommend'}, |
| {'label': 'Scatter Plot', 'value': 'scatter'}, |
| {'label': 'Line Chart', 'value': 'line'}, |
| {'label': 'Bar Chart', 'value': 'bar'}, |
| {'label': 'Histogram', 'value': 'histogram'}, |
| {'label': 'Box Plot', 'value': 'box'}, |
| {'label': 'Heatmap', 'value': 'heatmap'}, |
| {'label': 'Pie Chart', 'value': 'pie'} |
| ], |
| value='ai_recommend', |
| className="mb-2" |
| ) |
| ], width=6), |
| dbc.Col([ |
| html.Label("Color By:", className="form-label"), |
| dcc.Dropdown( |
| id='color-column', |
| placeholder="AI will suggest colors", |
| className="mb-2" |
| ) |
| ], width=6) |
| ]), |
| |
| dbc.Row([ |
| dbc.Col([ |
| html.Label("X-Axis:", className="form-label"), |
| dcc.Dropdown( |
| id='x-column', |
| placeholder="Select X column" |
| ) |
| ], width=6), |
| dbc.Col([ |
| html.Label("Y-Axis:", className="form-label"), |
| dcc.Dropdown( |
| id='y-column', |
| placeholder="Select Y column" |
| ) |
| ], width=6) |
| ], className="mb-3"), |
| |
| dcc.Graph(id='main-graph', style={'height': '500px'}), |
| |
| html.Div(id='ai-insights-display', className="mt-3") |
| ]) |
| ]), |
| |
| dbc.Card([ |
| dbc.CardBody([ |
| html.H4("π Data Explorer", className="card-title"), |
| html.Div(id='data-table') |
| ]) |
| ], className="mt-3") |
| ], width=8) |
| ], className="mt-4"), |
| |
| |
| dcc.Store(id='stored-data'), |
| dcc.Store(id='ai-engine'), |
| ], fluid=True) |
|
|
| def parse_contents(contents, filename): |
| """Parse uploaded file contents""" |
| content_type, content_string = contents.split(',') |
| decoded = base64.b64decode(content_string) |
| |
| try: |
| if 'csv' in filename: |
| df = pd.read_csv(io.StringIO(decoded.decode('utf-8'))) |
| elif 'xls' in filename: |
| df = pd.read_excel(io.BytesIO(decoded)) |
| else: |
| return None, "Unsupported file type" |
| |
| return df, None |
| except Exception as e: |
| return None, f"Error processing file: {str(e)}" |
|
|
| @app.callback( |
| [Output('stored-data', 'data'), |
| Output('upload-status', 'children'), |
| Output('data-table', 'children'), |
| Output('x-column', 'options'), |
| Output('y-column', 'options'), |
| Output('color-column', 'options'), |
| Output('x-column', 'value'), |
| Output('y-column', 'value')], |
| [Input('upload-data', 'contents')], |
| [State('upload-data', 'filename')] |
| ) |
| def update_data(contents, filename): |
| """Update data when file is uploaded""" |
| if contents is None: |
| return None, "", "", [], [], [], None, None |
| |
| df, error = parse_contents(contents, filename) |
| |
| if error: |
| return None, dbc.Alert(error, color="danger"), "", [], [], [], None, None |
| |
| |
| table = dbc.Table.from_dataframe( |
| df.head(10), |
| striped=True, |
| bordered=True, |
| hover=True, |
| size='sm' |
| ) |
| |
| |
| ai_engine = AIVisualizationEngine(df) |
| |
| success_msg = dbc.Alert([ |
| html.H6(f"β
File uploaded successfully! π€ AI Ready"), |
| html.P(f"Shape: {df.shape[0]} rows Γ {df.shape[1]} columns"), |
| html.P(f"π Numeric: {len(ai_engine.numeric_cols)}, π Categorical: {len(ai_engine.categorical_cols)}") |
| ], color="success") |
| |
| |
| all_columns = [{'label': col, 'value': col} for col in df.columns] |
| |
| |
| if ai_engine.numeric_cols: |
| default_x = ai_engine.numeric_cols[0] |
| default_y = ai_engine.numeric_cols[1] if len(ai_engine.numeric_cols) > 1 else None |
| else: |
| default_x = all_columns[0]['value'] if all_columns else None |
| default_y = all_columns[1]['value'] if len(all_columns) > 1 else None |
| |
| return df.to_dict('records'), success_msg, table, all_columns, all_columns, all_columns, default_x, default_y |
|
|
| @app.callback( |
| [Output('chart-type', 'value'), |
| Output('ai-recommendations', 'children')], |
| [Input('ai-create-btn', 'n_clicks')], |
| [State('ai-query', 'value'), |
| State('stored-data', 'data')] |
| ) |
| def handle_ai_query(n_clicks, query, data): |
| """Handle AI natural language queries""" |
| if not n_clicks or not query or not data: |
| return 'ai_recommend', "" |
| |
| df = pd.DataFrame(data) |
| ai_engine = AIVisualizationEngine(df) |
| |
| |
| parsed = ai_engine.parse_natural_language_query(query) |
| |
| recommendations = [] |
| if parsed['chart_type']: |
| recommendations.append(f"π― Suggested chart type: **{parsed['chart_type'].title()}**") |
| |
| if parsed['columns']: |
| recommendations.append(f"π Detected columns: {', '.join(parsed['columns'])}") |
| |
| if not recommendations: |
| recommendations.append("π€ Try queries like: 'scatter age salary', 'bar chart departments', 'histogram of scores'") |
| |
| return parsed['chart_type'] or 'ai_recommend', dbc.Alert(recommendations, color="info") |
|
|
| @app.callback( |
| Output('quick-analytics', 'children'), |
| [Input('stats-btn', 'n_clicks'), |
| Input('insights-btn', 'n_clicks'), |
| Input('outliers-btn', 'n_clicks')], |
| [State('stored-data', 'data'), |
| State('x-column', 'value'), |
| State('y-column', 'value')] |
| ) |
| def quick_analytics(stats_clicks, insights_clicks, outliers_clicks, data, x_col, y_col): |
| """Handle quick analytics buttons with AI insights""" |
| if not data: |
| return "" |
| |
| df = pd.DataFrame(data) |
| ai_engine = AIVisualizationEngine(df) |
| ctx = callback_context |
| |
| if not ctx.triggered: |
| return "" |
| |
| button_id = ctx.triggered[0]['prop_id'].split('.')[0] |
| |
| if button_id == 'stats-btn': |
| stats = df.describe() |
| return dbc.Alert([ |
| html.H6("π Summary Statistics"), |
| dbc.Table.from_dataframe(stats.reset_index(), size='sm') |
| ], color="light") |
| |
| elif button_id == 'insights-btn': |
| if x_col: |
| insights = ai_engine.generate_insights(x_col, y_col) |
| return dbc.Alert([ |
| html.H6("π€ AI Insights"), |
| html.Ul([html.Li(insight) for insight in insights]) |
| ], color="info") |
| return dbc.Alert("Select columns to get AI insights", color="warning") |
| |
| elif button_id == 'outliers-btn': |
| if x_col and x_col in ai_engine.numeric_cols: |
| outliers = ai_engine.detect_outliers(x_col) |
| if outliers: |
| outlier_data = df.loc[outliers, [x_col]] |
| return dbc.Alert([ |
| html.H6(f"π― Outliers in {x_col}"), |
| dbc.Table.from_dataframe(outlier_data.reset_index(), size='sm') |
| ], color="warning") |
| return dbc.Alert(f"β
No outliers detected in {x_col}", color="success") |
| return dbc.Alert("Select a numeric column to detect outliers", color="warning") |
| |
| return "" |
|
|
| @app.callback( |
| [Output('main-graph', 'figure'), |
| Output('ai-insights-display', 'children')], |
| [Input('stored-data', 'data'), |
| Input('chart-type', 'value'), |
| Input('x-column', 'value'), |
| Input('y-column', 'value'), |
| Input('color-column', 'value')] |
| ) |
| def update_main_graph(data, chart_type, x_col, y_col, color_col): |
| """Update visualization with AI enhancements""" |
| if not data: |
| fig = go.Figure() |
| fig.add_annotation(text="Upload data to see AI-powered visualizations", |
| x=0.5, y=0.5, showarrow=False, |
| font=dict(size=16, color="gray")) |
| fig.update_layout(template="plotly_white") |
| return fig, "" |
| |
| df = pd.DataFrame(data) |
| ai_engine = AIVisualizationEngine(df) |
| |
| |
| if chart_type == 'ai_recommend' and x_col: |
| recommendations = ai_engine.recommend_chart_type(x_col, y_col) |
| if recommendations: |
| chart_type = recommendations[0]['type'] |
| |
| |
| if not x_col and not y_col: |
| fig = go.Figure() |
| fig.add_annotation(text="Select columns or use AI Query to create visualization", |
| x=0.5, y=0.5, showarrow=False, |
| font=dict(size=16, color="gray")) |
| fig.update_layout(template="plotly_white") |
| return fig, "" |
| |
| insights_display = "" |
| |
| try: |
| |
| color_scheme = ai_engine.get_smart_color_scheme(chart_type, color_col) |
| |
| |
| if chart_type == 'scatter': |
| if x_col and y_col: |
| fig = px.scatter(df, x=x_col, y=y_col, color=color_col, |
| title=f"π€ AI Scatter Plot: {y_col} vs {x_col}", |
| color_continuous_scale=color_scheme) |
| |
| insights = ai_engine.generate_insights(x_col, y_col) |
| insights_display = dbc.Alert([ |
| html.H6("π€ AI Insights"), |
| html.Ul([html.Li(insight) for insight in insights]) |
| ], color="info") |
| else: |
| fig = go.Figure() |
| fig.add_annotation(text="Select both X and Y columns for scatter plot", |
| x=0.5, y=0.5, showarrow=False) |
| |
| elif chart_type == 'line': |
| if x_col and y_col: |
| fig = px.line(df, x=x_col, y=y_col, color=color_col, |
| title=f"π€ AI Line Chart: {y_col} vs {x_col}", |
| color_discrete_sequence=px.colors.qualitative.Set3) |
| else: |
| fig = go.Figure() |
| fig.add_annotation(text="Select both X and Y columns for line chart", |
| x=0.5, y=0.5, showarrow=False) |
| |
| elif chart_type == 'bar': |
| if x_col and y_col: |
| fig = px.bar(df, x=x_col, y=y_col, color=color_col, |
| title=f"π€ AI Bar Chart: {y_col} by {x_col}", |
| color_discrete_sequence=px.colors.qualitative.Set3) |
| elif x_col: |
| fig = px.bar(df[x_col].value_counts().reset_index(), |
| x='index', y=x_col, |
| title=f"π€ AI Value Counts: {x_col}", |
| color_discrete_sequence=px.colors.qualitative.Set3) |
| else: |
| fig = go.Figure() |
| fig.add_annotation(text="Select at least X column for bar chart", |
| x=0.5, y=0.5, showarrow=False) |
| |
| elif chart_type == 'histogram': |
| if x_col: |
| fig = px.histogram(df, x=x_col, color=color_col, |
| title=f"π€ AI Histogram: {x_col}", |
| color_discrete_sequence=px.colors.qualitative.Pastel) |
| |
| mean_val = df[x_col].mean() if x_col in ai_engine.numeric_cols else None |
| if mean_val: |
| fig.add_vline(x=mean_val, line_dash="dash", line_color="red", |
| annotation_text=f"Mean: {mean_val:.2f}") |
| else: |
| fig = go.Figure() |
| fig.add_annotation(text="Select X column for histogram", |
| x=0.5, y=0.5, showarrow=False) |
| |
| elif chart_type == 'box': |
| if y_col: |
| fig = px.box(df, x=color_col, y=y_col, |
| title=f"π€ AI Box Plot: {y_col}" + (f" by {color_col}" if color_col else ""), |
| color_discrete_sequence=px.colors.qualitative.Set2) |
| elif x_col: |
| fig = px.box(df, y=x_col, |
| title=f"π€ AI Box Plot: {x_col}", |
| color_discrete_sequence=px.colors.qualitative.Set2) |
| else: |
| fig = go.Figure() |
| fig.add_annotation(text="Select a column for box plot", |
| x=0.5, y=0.5, showarrow=False) |
| |
| elif chart_type == 'heatmap': |
| numeric_cols = df.select_dtypes(include=['number']).columns |
| if len(numeric_cols) > 1: |
| corr_matrix = df[numeric_cols].corr() |
| fig = px.imshow(corr_matrix, |
| text_auto=True, |
| aspect="auto", |
| title="π€ AI Correlation Heatmap", |
| color_continuous_scale='RdBu_r') |
| else: |
| fig = go.Figure() |
| fig.add_annotation(text="Need at least 2 numeric columns for heatmap", |
| x=0.5, y=0.5, showarrow=False) |
| |
| elif chart_type == 'pie': |
| if x_col: |
| value_counts = df[x_col].value_counts() |
| fig = px.pie(values=value_counts.values, |
| names=value_counts.index, |
| title=f"π€ AI Pie Chart: {x_col}", |
| color_discrete_sequence=px.colors.qualitative.Pastel) |
| else: |
| fig = go.Figure() |
| fig.add_annotation(text="Select X column for pie chart", |
| x=0.5, y=0.5, showarrow=False) |
| |
| else: |
| fig = go.Figure() |
| fig.add_annotation(text="π€ AI is analyzing... Select chart type or use AI Query", |
| x=0.5, y=0.5, showarrow=False) |
| |
| |
| fig.update_layout( |
| template="plotly_white", |
| height=500, |
| font=dict(size=12), |
| title_font_size=16, |
| ) |
| |
| return fig, insights_display |
| |
| except Exception as e: |
| fig = go.Figure() |
| fig.add_annotation(text=f"AI Error: {str(e)}", |
| x=0.5, y=0.5, showarrow=False, |
| font=dict(color="red")) |
| fig.update_layout(template="plotly_white") |
| return fig, "" |
|
|
| if __name__ == '__main__': |
| app.run(host='0.0.0.0', port=8051, debug=True) |