Spaces:

SamadhiDBS
/

smart-analytics-copilot

Sleeping

App Files Files Community

SamadhiDBS commited on 22 days ago

Commit

d18f851

verified ·

1 Parent(s): 2d9ee72

Upload 24 files

Browse files

Files changed (24) hide show

app/__init__.py +0 -0
app/__pycache__/analyzer.cpython-311.pyc +0 -0
app/__pycache__/chart_customizer.cpython-311.pyc +0 -0
app/__pycache__/dashboard.cpython-311.pyc +0 -0
app/__pycache__/data_processor.cpython-311.pyc +0 -0
app/__pycache__/export_utils.cpython-311.pyc +0 -0
app/__pycache__/insight_generator.cpython-311.pyc +0 -0
app/__pycache__/query_engine.cpython-311.pyc +0 -0
app/__pycache__/session_manager.cpython-311.pyc +0 -0
app/analyzer.py +159 -0
app/chart_customizer.py +177 -0
app/dashboard.py +171 -0
app/data_processor.py +132 -0
app/export_utils.py +251 -0
app/insight_generator.py +181 -0
app/main.py +646 -0
app/query_engine.py +370 -0
app/session_manager.py +82 -0
requirements.txt +15 -0
run.py +22 -0
saved_sessions/session_20260418_131145.pkl +3 -0
saved_sessions/session_20260418_132524.pkl +3 -0
saved_sessions/session_20260418_135615.pkl +3 -0
saved_sessions/session_20260418_135934.pkl +3 -0

app/__init__.py ADDED Viewed

File without changes

app/__pycache__/analyzer.cpython-311.pyc ADDED Viewed

Binary file (7.69 kB). View file

app/__pycache__/chart_customizer.cpython-311.pyc ADDED Viewed

Binary file (6.8 kB). View file

app/__pycache__/dashboard.cpython-311.pyc ADDED Viewed

Binary file (7.51 kB). View file

app/__pycache__/data_processor.cpython-311.pyc ADDED Viewed

Binary file (7.66 kB). View file

app/__pycache__/export_utils.cpython-311.pyc ADDED Viewed

Binary file (12.3 kB). View file

app/__pycache__/insight_generator.cpython-311.pyc ADDED Viewed

Binary file (10.7 kB). View file

app/__pycache__/query_engine.cpython-311.pyc ADDED Viewed

Binary file (23.3 kB). View file

app/__pycache__/session_manager.cpython-311.pyc ADDED Viewed

Binary file (5.02 kB). View file

app/analyzer.py ADDED Viewed

	@@ -0,0 +1,159 @@

+##________automated analysis________##
+import pandas as pd
+import numpy as np
+from scipy import stats
+class Analyzer:
+    def __init__(self, df, schema):
+        self.df = df
+        self.schema = schema
+        self.insights = []
+    def run_full_analysis(self):
+        """run all analysis methods"""
+        print("Running automated analysis....")
+        analysis = {
+            'descriptive_stats': self.descriptive_statistics(),
+            'correlations': self.correlation_analysis(),
+            'trends': self.trend_detection(),
+            'group_analysis': self.group_by_analysis(),
+            'outliers': self.detect_outliers(),
+            'distributions': self.get_distributions()
+        }
+        return analysis
+    def descriptive_statistics(self):
+        """basic statistics for numeric columns"""
+        stats = {}
+        for col in self.schema['numeric']:
+            stats[col] = {
+                'mean': self.df[col].mean(),
+                'median': self.df[col].median(),
+                'std': self.df[col].std(),
+                'min': self.df[col].min(),
+                'max': self.df[col].max(),
+                'q1': self.df[col].quantile(0.25),
+                'q3': self.df[col].quantile(0.75)
+            }
+        return stats
+    def correlation_analysis(self):
+        """fins correlations between numeric columns"""
+        if len(self.schema['numeric']) >= 2:
+            corr_matrix = self.df[self.schema['numeric']].corr()
+            ## ind strong correlations
+            strong_corrs = []
+            for i in range(len(corr_matrix.columns)):
+                for j in range(i+1, len(corr_matrix.columns)):
+                    corr_value = corr_matrix.iloc[i,j]
+                    if abs(corr_value) > 0.5:  # strong correlation threshold
+                        strong_corrs.append({
+                            'col1': corr_matrix.columns[i],
+                            'col2': corr_matrix.columns[j],
+                            'correlation': corr_value,
+                            'strength': 'positive' if corr_value > 0 else 'negative'
+                        })
+            return strong_corrs
+        return []
+    def trend_detection(self):
+        """detect trends in time series data"""
+        trends = []
+        for date_col in self.schema['datetime']:
+            for num_col in self.schema['numeric']:
+                #group by date and calculate mean
+                trend_data = self.df.groupby(pd.Grouper(key=date_col, freq='M'))[num_col].mean()
+                if len(trend_data) > 1:
+                    # simple trend detection: compare first and last
+                    first_val = trend_data.iloc[0]
+                    last_val = trend_data.iloc[-1]
+                    percent_change = ((last_val - first_val) / first_val) * 100 if first_val != 0 else 0
+                    trends.append({
+                        'column': num_col,
+                        'time_column': date_col,
+                        'percent_change': percent_change,
+                        'direction': 'increasing' if percent_change > 0 else 'decreasing',
+                        'first_value': first_val,
+                        'last_value': last_val
+                    })
+        return trends
+    def group_by_analysis(self):
+        """analyze data by categorical groups"""
+        group_analysis = {}
+        for cat_col in self.schema['categorical']:
+            group_analysis[cat_col] = {}
+            for num_col in self.schema['numeric']:
+                grouped = self.df.groupby(cat_col)[num_col].agg(['mean', 'sum', 'count'])
+                #find top performer
+                top_category = grouped['mean'].idxmax() if len(grouped) > 0 else None
+                top_value = grouped['mean'].max() if len(grouped) > 0 else 0
+                group_analysis[cat_col][num_col] = {
+                    'grouped_data': grouped.to_dict(),
+                    'top_category': top_category,
+                    'top_value': top_value,
+                    'total_categories': len(grouped)
+                }
+        return group_analysis
+    def detect_outliers(self):
+        """detect outliers using IQR method"""
+        outliers = {}
+        for col in self.schema['numeric']:
+            Q1 = self.df[col].quantile(0.25)
+            Q3 = self.df[col].quantile(0.75)
+            IQR = Q3 - Q1
+            lower_bound = Q1 - 1.5 * IQR
+            upper_bound = Q3 + 1.5 * IQR
+            outlier_count = len(self.df[(self.df[col] < lower_bound) | (self.df[col] > upper_bound)])
+            if outlier_count > 0:
+                outliers[col] = {
+                    'count': outlier_count,
+                    'percentage': (outlier_count / len(self.df)) * 100,
+                    'lower_bound': lower_bound,
+                    'upper_bound': upper_bound
+                }
+        return outliers
+    def get_distributions(self):
+        """get distribution information for numeric columns"""
+        distributions = {}
+        for col in self.schema['numeric']:
+            distributions[col] = {
+                'skewness': self.df[col].skew(),
+                'kurtosis': self.df[col].kurtosis(),
+                'unique_values': self.df[col].nunique()
+            }
+            #determine distribution shape
+            skew = distributions[col]['skewness']
+            if skew > 1:
+                distributions[col]['shape'] = 'right-skewed'
+            elif skew < -1:
+                distributions[col]['shape'] = 'left-skewed'
+            else:
+                distributions[col]['shape'] = 'approximately normal'
+        return distributions

app/chart_customizer.py ADDED Viewed

	@@ -0,0 +1,177 @@

+"""
+Chart Customizer - Let users choose chart types
+"""
+import plotly.express as px
+import plotly.graph_objects as go
+import pandas as pd
+class ChartCustomizer:
+    def __init__(self, df):
+        self.df = df
+    def get_available_charts(self):
+        """Return available chart types based on data"""
+        charts = []
+        if len(self.df.select_dtypes(include=['number']).columns) > 0:
+            charts.append('📊 Histogram')
+            charts.append('📈 Line Chart')
+            charts.append('📉 Scatter Plot')
+            charts.append('📦 Box Plot')
+        if len(self.df.select_dtypes(include=['object']).columns) > 0:
+            charts.append('🥧 Bar Chart')
+            charts.append('🍩 Pie Chart')
+        if len(self.df.select_dtypes(include=['datetime64']).columns) > 0:
+            charts.append('📅 Time Series')
+        charts.append('🔥 Heatmap')
+        return charts
+    def create_chart(self, chart_type, x_col, y_col=None, color_col=None, title=None):
+        """Create customized chart"""
+        if title is None:
+            title = f"{chart_type}: {x_col}"
+            if y_col:
+                title += f" vs {y_col}"
+        # Histogram
+        if 'Histogram' in chart_type:
+            fig = px.histogram(
+                self.df, x=x_col,
+                title=title,
+                color=color_col if color_col else None,
+                nbins=30,
+                color_discrete_sequence=px.colors.sequential.Plasma
+            )
+        # Bar Chart
+        elif 'Bar Chart' in chart_type:
+            if y_col and y_col in self.df.columns:
+                # Grouped bar chart
+                agg_data = self.df.groupby(x_col)[y_col].mean().reset_index()
+                fig = px.bar(
+                    agg_data, x=x_col, y=y_col,
+                    title=title,
+                    color=color_col if color_col else None,
+                    color_discrete_sequence=px.colors.qualitative.Set2
+                )
+            else:
+                # Count bar chart
+                counts = self.df[x_col].value_counts().head(20).reset_index()
+                counts.columns = [x_col, 'count']
+                fig = px.bar(
+                    counts, x=x_col, y='count',
+                    title=f"Count of {x_col}",
+                    color_discrete_sequence=['#2E86AB']
+                )
+        # Line Chart
+        elif 'Line Chart' in chart_type:
+            if y_col and y_col in self.df.columns:
+                fig = px.line(
+                    self.df, x=x_col, y=y_col,
+                    title=title,
+                    color=color_col if color_col else None,
+                    markers=True
+                )
+            else:
+                fig = px.line(
+                    self.df, x=x_col,
+                    title=title,
+                    markers=True
+                )
+        # Scatter Plot (without trendline to avoid statsmodels)
+        elif 'Scatter' in chart_type:
+            if y_col and y_col in self.df.columns:
+                fig = px.scatter(
+                    self.df, x=x_col, y=y_col,
+                    title=title,
+                    color=color_col if color_col else None,
+                    size=y_col if y_col else None,
+                    hover_data=[x_col, y_col] if y_col else [x_col]
+                    # Removed trendline to avoid statsmodels
+                )
+            else:
+                fig = px.scatter(
+                    self.df, x=x_col, y=x_col,
+                    title=title,
+                    color=color_col if color_col else None
+                )
+        # Box Plot
+        elif 'Box' in chart_type:
+            if y_col and y_col in self.df.columns:
+                fig = px.box(
+                    self.df, x=x_col, y=y_col,
+                    title=title,
+                    color=color_col if color_col else None,
+                    points="all"
+                )
+            else:
+                fig = px.box(
+                    self.df, y=x_col,
+                    title=f"Box Plot of {x_col}",
+                    points="all"
+                )
+        # Pie Chart
+        elif 'Pie' in chart_type:
+            counts = self.df[x_col].value_counts().head(10).reset_index()
+            counts.columns = [x_col, 'count']
+            fig = px.pie(
+                counts, values='count', names=x_col,
+                title=f"Distribution of {x_col}",
+                hole=0.3
+            )
+        # Heatmap
+        elif 'Heatmap' in chart_type:
+            numeric_cols = self.df.select_dtypes(include=['number']).columns
+            if len(numeric_cols) > 1:
+                corr = self.df[numeric_cols].corr()
+                fig = px.imshow(
+                    corr,
+                    text_auto='.2f',
+                    aspect='auto',
+                    color_continuous_scale='RdBu',
+                    title="Correlation Heatmap"
+                )
+            else:
+                return None
+        # Time Series
+        elif 'Time Series' in chart_type:
+            date_cols = self.df.select_dtypes(include=['datetime64']).columns
+            if len(date_cols) > 0:
+                date_col = date_cols[0]
+                if y_col and y_col in self.df.columns:
+                    time_data = self.df.groupby(date_col)[y_col].mean().reset_index()
+                    fig = px.line(
+                        time_data, x=date_col, y=y_col,
+                        title=f"{y_col} Over Time",
+                        markers=True
+                    )
+                else:
+                    fig = None
+            else:
+                fig = None
+        else:
+            fig = None
+        if fig:
+            # Apply common styling
+            fig.update_layout(
+                template='plotly_white',
+                height=500,
+                title_font_size=16,
+                title_x=0.5
+            )
+        return fig

app/dashboard.py ADDED Viewed

	@@ -0,0 +1,171 @@

+###____________ Chart selection logic and dashboard generation___________
+import plotly.express as px
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+import pandas as pd
+class DashboardGenerator:
+    def __init__(self, df, schema):
+        self.df = df
+        self.schema = schema
+        self.charts = []
+    def generate_all_charts(self):
+        """
+        Generate appropriate charts for each column type
+        """
+        print(" Generating charts...")
+        # Numeric columns - Histograms
+        for col in self.schema['numeric'][:5]:  # Limit to 5 charts
+            fig = self.create_histogram(col)
+            self.charts.append({
+                'title': f'Distribution of {col}',
+                'figure': fig,
+                'type': 'histogram'
+            })
+        # Categorical columns - Bar charts (top 10)
+        for col in self.schema['categorical'][:3]:
+            fig = self.create_bar_chart(col)
+            self.charts.append({
+                'title': f'Top values in {col}',
+                'figure': fig,
+                'type': 'bar'
+            })
+        # Time series - Line charts
+        for date_col in self.schema['datetime']:
+            for num_col in self.schema['numeric'][:2]:
+                fig = self.create_time_series(date_col, num_col)
+                self.charts.append({
+                    'title': f'{num_col} over time',
+                    'figure': fig,
+                    'type': 'line'
+                })
+        # Correlation heatmap
+        if len(self.schema['numeric']) >= 2:
+            fig = self.create_correlation_heatmap()
+            self.charts.append({
+                'title': 'Correlation Heatmap',
+                'figure': fig,
+                'type': 'heatmap'
+            })
+        return self.charts
+    def create_histogram(self, column):
+        """Create histogram for numeric column"""
+        fig = px.histogram(
+            self.df,
+            x=column,
+            title=f'Distribution of {column}',
+            color_discrete_sequence=['#2E86AB'],
+            nbins=30
+        )
+        fig.update_layout(
+            showlegend=False,
+            height=400,
+            template='plotly_white'
+        )
+        return fig
+    def create_bar_chart(self, column):
+        """Create bar chart for categorical column"""
+        value_counts = self.df[column].value_counts().head(10)
+        fig = px.bar(
+            x=value_counts.values,
+            y=value_counts.index,
+            orientation='h',
+            title=f'Top 10 values in {column}',
+            color=value_counts.values,
+            color_continuous_scale='Blues'
+        )
+        fig.update_layout(
+            xaxis_title='Count',
+            yaxis_title=column,
+            height=400,
+            template='plotly_white'
+        )
+        return fig
+    def create_time_series(self, date_col, value_col):
+        """Create time series line chart"""
+        # Group by date
+        time_data = self.df.groupby(pd.Grouper(key=date_col, freq='D'))[value_col].mean().reset_index()
+        fig = px.line(
+            time_data,
+            x=date_col,
+            y=value_col,
+            title=f'{value_col} over time',
+            markers=True
+        )
+        fig.update_layout(
+            xaxis_title='Date',
+            yaxis_title=value_col,
+            height=400,
+            template='plotly_white'
+        )
+        return fig
+    def create_correlation_heatmap(self):
+        """Create correlation heatmap"""
+        corr_matrix = self.df[self.schema['numeric']].corr()
+        fig = px.imshow(
+            corr_matrix,
+            text_auto='.2f',
+            aspect='auto',
+            color_continuous_scale='RdBu',
+            title='Correlation Heatmap'
+        )
+        fig.update_layout(
+            height=500,
+            template='plotly_white'
+        )
+        return fig
+    def create_key_metrics(self):
+        """
+        Create KPI cards for important metrics
+        """
+        metrics = []
+        for col in self.schema['numeric'][:4]:  # Top 4 numeric columns
+            mean_val = self.df[col].mean()
+            std_val = self.df[col].std()
+            min_val = self.df[col].min()
+            max_val = self.df[col].max()
+            metrics.append({
+                'name': col.upper(),
+                'value': f"{mean_val:,.0f}",
+                'change': f"±{std_val:,.0f}",
+                'min': f"{min_val:,.0f}",
+                'max': f"{max_val:,.0f}",
+                'type': 'average'
+            })
+        return metrics
+    def create_summary_table(self):
+        """
+        Create summary statistics table
+        """
+        summary = []
+        for col in self.schema['numeric']:
+            summary.append({
+                'Column': col,
+                'Mean': round(self.df[col].mean(), 2),
+                'Median': round(self.df[col].median(), 2),
+                'Std Dev': round(self.df[col].std(), 2),
+                'Min': round(self.df[col].min(), 2),
+                'Max': round(self.df[col].max(), 2)
+            })
+        return pd.DataFrame(summary)

app/data_processor.py ADDED Viewed

	@@ -0,0 +1,132 @@

+## data ingestion & preprocessing & schema detection
+import pandas as pd
+import numpy as np
+from pathlib import Path
+import json
+class DataProcessor:
+    def __init__(self):
+        self.df = None
+        self.schema = {}
+    def load_data(self, file_path):
+        ##______________load csv or json file________________________
+        file_ext = Path(file_path).suffix.lower()
+        if file_ext == '.csv':
+            self.df = pd.read_csv(file_path)
+        elif file_ext == '.json':
+            self.df = pd.read_json(file_path)
+        else:
+            raise ValueError("Unsupported file type. Use CSV or JSON file")
+        return self.df
+    def load_from_upload(self, uploaded_file):
+        ###__________load from stramlit upload_____________
+        if uploaded_file.name.endswith('.csv'):
+            self.df = pd.read_csv(uploaded_file)
+        elif uploaded_file.name.endswith('.json'):
+            self.df = pd.read_json(uploaded_file)
+        else:
+            raise ValueError("Unsupported file type")
+        return self.df
+    def preprocess(self):
+        """
+        Step 2: Clean the data - Enhanced version
+        """
+        print("🔄 Preprocessing data...")
+        # FIRST: Replace '?' and other placeholders with NaN
+        placeholder_values = ['?', 'None', 'null', 'NULL', 'NaN', 'nan', '', ' ', 'Unknown', 'unknown']
+        self.df = self.df.replace(placeholder_values, pd.NA)
+        # Remove duplicate rows
+        initial_rows = len(self.df)
+        self.df = self.df.drop_duplicates()
+        print(f"  Removed {initial_rows - len(self.df)} duplicates")
+        # Handle missing values
+        missing_before = self.df.isnull().sum().sum()
+        # For numeric columns: fill with median
+        numeric_cols = self.df.select_dtypes(include=[np.number]).columns
+        for col in numeric_cols:
+            self.df[col] = self.df[col].fillna(self.df[col].median())
+        # For categorical columns: fill with mode or 'Unknown'
+        categorical_cols = self.df.select_dtypes(include=['object']).columns
+        for col in categorical_cols:
+            if not self.df[col].isnull().all():
+                mode_val = self.df[col].mode()
+                if len(mode_val) > 0:
+                    self.df[col] = self.df[col].fillna(mode_val[0])
+                else:
+                    self.df[col] = self.df[col].fillna("Unknown")
+        missing_after = self.df.isnull().sum().sum()
+        print(f"  Filled {missing_before - missing_after} missing values")
+        # Convert data types intelligently
+        self._convert_types()
+        return self.df
+    def _convert_types(self):
+        ##________auto-convert data typpes_______
+        # try to convert object columns to datetime
+        for col in self.df.columns:
+            if self.df[col].dtype == 'object':
+                try:
+                    self.df[col] = pd.to_datetime(self.df[col])
+                    print(f"   Converted {col} to datetime")
+                except:
+                    pass
+    def detect_schema(self):
+        """
+        Step 3: Detect schema - identify column types
+        """
+        self.schema = {
+            'numeric': [],
+            'categorical': [],
+            'datetime': [],
+            'text': []
+        }
+        for col in self.df.columns:
+            if pd.api.types.is_datetime64_any_dtype(self.df[col]):
+                self.schema['datetime'].append(col)
+            elif pd.api.types.is_numeric_dtype(self.df[col]):
+                self.schema['numeric'].append(col)
+            elif pd.api.types.is_object_dtype(self.df[col]):
+                # Check if it's categorical (few unique values)
+                unique_ratio = self.df[col].nunique() / len(self.df)
+                # Lower threshold to catch more categories (0.05 = 5%)
+                if unique_ratio < 0.5:  # Changed from 0.05 to 0.5 to catch product, category, region
+                    self.schema['categorical'].append(col)
+                else:
+                    self.schema['text'].append(col)
+        print("\n📊 Schema Detected:")
+        print(f"  Numeric columns: {self.schema['numeric']}")
+        print(f"  Categorical columns: {self.schema['categorical']}")
+        print(f"  Date columns: {self.schema['datetime']}")
+        return self.schema
+    def get_summary(self):
+        ##__________get basic data summary_________
+        return{
+            'rows': len(self.df),
+            'columns': len(self.df.columns),
+            'column_names': list(self.df.columns),
+            'missing_values': self.df.isnull().sum().to_dict(),
+            'memory_usage': self.df.memory_usage(deep=True).sum() / 1024**2  # MB
+        }

app/export_utils.py ADDED Viewed

	@@ -0,0 +1,251 @@

+"""
+Export Utilities - CSV, Excel, and REAL Power BI export
+"""
+import pandas as pd
+import io
+import json
+from datetime import datetime
+class ExportUtils:
+    def __init__(self, df):
+        self.df = df
+    def to_csv(self):
+        """Export to CSV"""
+        return self.df.to_csv(index=False).encode('utf-8')
+    def to_excel(self):
+        """Export to Excel with formatting"""
+        output = io.BytesIO()
+        with pd.ExcelWriter(output, engine='openpyxl') as writer:
+            # Write main data
+            self.df.to_excel(writer, sheet_name='Data', index=False)
+            # Add summary sheet
+            numeric_cols = self.df.select_dtypes(include=['number']).columns
+            if len(numeric_cols) > 0:
+                summary = self.df[numeric_cols].describe()
+                summary.to_excel(writer, sheet_name='Summary', index=True)
+            # Add column info sheet
+            col_info = pd.DataFrame({
+                'Column': self.df.columns,
+                'Type': self.df.dtypes.astype(str),
+                'Nulls': self.df.isnull().sum(),
+                'Unique': self.df.nunique()
+            })
+            col_info.to_excel(writer, sheet_name='Column Info', index=False)
+        output.seek(0)
+        return output.getvalue()
+    def to_powerbi_ready(self):
+        """Prepare data for Power BI - Creates CSV optimized for Power BI"""
+        df_powerbi = self.df.copy()
+        # Clean column names (Power BI friendly)
+        df_powerbi.columns = [col.replace(' ', '_').replace('-', '_').replace('/', '_') for col in df_powerbi.columns]
+        # Clean datetime columns for Power BI
+        for col in df_powerbi.columns:
+            if 'datetime' in col.lower() or 'date' in col.lower() or 'time' in col.lower():
+                try:
+                    df_powerbi[col] = pd.to_datetime(df_powerbi[col])
+                except:
+                    pass
+        # Convert to CSV for Power BI import
+        return df_powerbi.to_csv(index=False).encode('utf-8')
+    def to_powerbi_with_metadata(self):
+        """Export to Power BI with metadata file"""
+        # Main data CSV
+        data_csv = self.to_powerbi_ready()
+        # Create metadata JSON
+        numeric_cols = self.df.select_dtypes(include=['number']).columns
+        categorical_cols = self.df.select_dtypes(include=['object']).columns
+        date_cols = self.df.select_dtypes(include=['datetime64']).columns
+        metadata = {
+            'export_date': datetime.now().isoformat(),
+            'table_name': 'Cleaned_Data',
+            'row_count': len(self.df),
+            'column_count': len(self.df.columns),
+            'columns': list(self.df.columns),
+            'numeric_columns': list(numeric_cols),
+            'categorical_columns': list(categorical_cols),
+            'date_columns': list(date_cols),
+            'recommended_measures': {},
+            'recommended_visuals': []
+        }
+        # Add recommended measures
+        for col in numeric_cols[:10]:
+            metadata['recommended_measures'][f'Total_{col}'] = f'SUM(Cleaned_Data[{col}])'
+            metadata['recommended_measures'][f'Average_{col}'] = f'AVERAGE(Cleaned_Data[{col}])'
+        # Add recommended visuals
+        if len(categorical_cols) > 0 and len(numeric_cols) > 0:
+            metadata['recommended_visuals'].append({
+                'type': 'bar_chart',
+                'category': categorical_cols[0],
+                'value': numeric_cols[0],
+                'title': f'{numeric_cols[0]} by {categorical_cols[0]}'
+            })
+        if len(date_cols) > 0 and len(numeric_cols) > 0:
+            metadata['recommended_visuals'].append({
+                'type': 'line_chart',
+                'date': date_cols[0],
+                'value': numeric_cols[0],
+                'title': f'{numeric_cols[0]} Over Time'
+            })
+        metadata_json = json.dumps(metadata, indent=2).encode('utf-8')
+        return {
+            'data': data_csv,
+            'metadata': metadata_json,
+            'instructions': self._get_powerbi_instructions()
+        }
+    def _get_powerbi_instructions(self):
+        """Get step-by-step Power BI import instructions"""
+        instructions = """
+=== POWER BI IMPORT INSTRUCTIONS ===
+METHOD 1: Direct Import (Recommended)
+1. Open Power BI Desktop
+2. Click "Get Data" → "Text/CSV"
+3. Select the exported CSV file
+4. Click "Load"
+5. Power BI will auto-detect data types
+METHOD 2: Advanced Import
+1. Click "Get Data" → "More..."
+2. Search for "CSV" or "Text"
+3. Select your file
+4. Configure:
+   - First row as headers: YES
+   - Data type detection: Based on first 200 rows
+5. Click "Load"
+=== AFTER IMPORT ===
+Recommended DAX Measures to Create:
+"""
+        return instructions
+    def to_powerbi_zip(self):
+        """Create a zip file with all Power BI resources"""
+        import zipfile
+        output = io.BytesIO()
+        with zipfile.ZipFile(output, 'w', zipfile.ZIP_DEFLATED) as zipf:
+            # Add data CSV
+            data_csv = self.to_powerbi_ready()
+            zipf.writestr('data.csv', data_csv)
+            # Add metadata
+            powerbi_data = self.to_powerbi_with_metadata()
+            zipf.writestr('metadata.json', powerbi_data['metadata'])
+            # Add instructions
+            zipf.writestr('instructions.txt', powerbi_data['instructions'])
+            # Add sample DAX file
+            dax_content = self._generate_dax_file()
+            zipf.writestr('measures.dax', dax_content)
+        output.seek(0)
+        return output.getvalue()
+    def _generate_dax_file(self):
+        """Generate DAX file for Power BI"""
+        numeric_cols = self.df.select_dtypes(include=['number']).columns
+        dax = f"""// DAX Measures for Power BI
+// Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+// Table Name: Cleaned_Data
+// ============ BASIC MEASURES ============
+Total Records = COUNTROWS(Cleaned_Data)
+"""
+        for col in numeric_cols[:15]:
+            dax += f"""
+// {col} Measures
+Total {col} = SUM(Cleaned_Data[{col}])
+Average {col} = AVERAGE(Cleaned_Data[{col}])
+Min {col} = MIN(Cleaned_Data[{col}])
+Max {col} = MAX(Cleaned_Data[{col}])
+"""
+        dax += """
+// ============ HOW TO USE ============
+// 1. In Power BI, go to "Modeling" tab
+// 2. Click "New Measure"
+// 3. Copy-paste any measure above
+// 4. Press Enter to save
+// ============ EXAMPLE VISUALS ============
+// - Card Visual: Total Records
+// - Bar Chart: Category vs Total Sales
+// - Line Chart: Date vs Average Value
+"""
+        return dax
+    def to_json(self):
+        """Export to JSON"""
+        return self.df.to_json(orient='records', indent=2).encode('utf-8')
+    def get_powerbi_template(self):
+        """Get Power BI DAX template (legacy - kept for compatibility)"""
+        numeric_cols = self.df.select_dtypes(include=['number']).columns
+        categorical_cols = self.df.select_dtypes(include=['object']).columns
+        template = f"""// Power BI DAX Template for your data
+// Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+// Table name: Cleaned_Data
+// ============ BASIC MEASURES ============
+Total Records = COUNTROWS(Cleaned_Data)
+"""
+        for col in numeric_cols[:10]:
+            template += f"""
+Total {col} = SUM(Cleaned_Data[{col}])
+Average {col} = AVERAGE(Cleaned_Data[{col}])
+"""
+        template += """
+// ============ HOW TO USE ============
+// 1. Export your data as CSV first
+// 2. In Power BI: Get Data → CSV → Select your file
+// 3. Go to Modeling tab → New Measure
+// 4. Copy and paste any measure above
+// 5. Drag measures to visuals
+// ============ RECOMMENDED VISUALS ============
+"""
+        if len(categorical_cols) > 0 and len(numeric_cols) > 0:
+            template += f"""
+- Bar Chart: {categorical_cols[0]} vs {numeric_cols[0]}
+"""
+        if len(self.df.select_dtypes(include=['datetime64']).columns) > 0:
+            template += f"""
+- Line Chart: Date vs {numeric_cols[0] if len(numeric_cols) > 0 else 'Value'}
+"""
+        return template

app/insight_generator.py ADDED Viewed

	@@ -0,0 +1,181 @@

+##________generate natural language insights from analysis_________##
+import os
+import json
+from typing import Dict, Any
+##___________________________
+class InsightGenerator:
+    def __init__(self, use_openai=False, api_key=None):
+        self.use_openai = use_openai
+        if use_openai and api_key:
+            import openai
+            openai.api_key = api_key
+            self.openai = openai
+        else:
+            print(" Using template-based insight generation")
+    def generate_insights(self, df, schema, analysis):
+        """generate human readable insights"""
+        insights = []
+        # 1.dataset overview
+        insights.append(f" **Dataset Overview**: Your dataset has {len(df)} rows and {len(df.columns)} columns.")
+        # 2. Key statistics
+        insights.extend(self._generate_statistical_insights(analysis['descriptive_stats']))
+        # 3. Correlation insights
+        insights.extend(self._generate_correlation_insights(analysis['correlations']))
+        # 4. Trend insights
+        insights.extend(self._generate_trend_insights(analysis['trends']))
+        # 5. Group analysis insights
+        insights.extend(self._generate_group_insights(analysis['group_analysis']))
+        # 6. Outlier insights
+        insights.extend(self._generate_outlier_insights(analysis['outliers']))
+        # 7. Distribution insights
+        insights.extend(self._generate_distribution_insights(analysis['distributions']))
+        # 8. Actionable recommendations
+        insights.extend(self._generate_recommendations(analysis))
+        return insights
+    def _generate_statistical_insights(self, stats):
+        """generate insights from descriptive statistics"""
+        insights = []
+        for col, values in stats.items():
+            if values['mean'] > values['median'] * 1.2:
+                insights.append(f" **{col}** is right-skewed (mean {values['mean']:.2f} > median {values['median']:.2f}), suggesting some high values pulling the average up.")
+            elif values['median'] > values['mean'] * 1.2:
+                insights.append(f" **{col}** is left-skewed (median {values['median']:.2f} > mean {values['mean']:.2f}).")
+        return insights[:3]  ### limit to top 3
+    def _generate_correlation_insights(self, correlations):
+        """generate insights from correlations"""
+        insights = []
+        for corr in correlations[:3]:  # Top 3 correlations
+            strength = "strong positive" if corr['strength'] == 'positive' else "strong negative"
+            insights.append(f" **{corr['col1']}** and **{corr['col2']}** show a {strength} correlation ({corr['correlation']:.2f}).")
+            if corr['strength'] == 'positive':
+                insights.append(f"  → When {corr['col1']} increases, {corr['col2']} tends to increase as well.")
+            else:
+                insights.append(f"  → When {corr['col1']} increases, {corr['col2']} tends to decrease.")
+        return insights
+    def _generate_trend_insights(self, trends):
+        """generate insights from trends"""
+        insights =[]
+        for trend in trends:
+            direction = "increased" if trend['direction'] == 'increasing' else "decreased"
+            change_abs = abs(trend['percent_change'])
+            if change_abs > 20:
+                insights.append(f" **{trend['column']}** has {direction} significantly by {change_abs:.1f}% over time.")
+            elif change_abs > 5:
+                insights.append(f" **{trend['column']}** has {direction} by {change_abs:.1f}% over the period.")
+        return insights
+    def _generate_group_insights(self, group_analysis):
+        """generate insights from group analysis"""
+        insights = []
+        for cat_col, analyses in group_analysis.items():
+            for num_col, analysis in analyses.items():
+                if analysis['top_category']:
+                    insights.append(f" **{analysis['top_category']}** is the top performer in {cat_col} for {num_col} with {analysis['top_value']:.2f}.")
+        return insights[:3]
+    def _generate_outlier_insights(self, outliers):
+        """generate insights about outliers"""
+        insights = []
+        for col, data in outliers.items():
+            if data['percentage'] < 5:
+                insights.append(f" **{col}** contains {data['count']} outliers ({data['percentage']:.1f}% of data). These might be worth investigating.")
+        return insights
+    def _generate_distribution_insights(self, distributions):
+        """generate insights about distributions"""
+        insights = []
+        for col, dist in distributions.items():
+            if dist['shape'] != 'approximately normal':
+                insights.append(f" **{col}** has a {dist['shape']} distribution (skewness: {dist['skewness']:.2f}).")
+        return insights[:2]
+    def _generate_recommendations(self, analysis):
+        """generate actionable recommendations"""
+        recommendations = []
+        # Check for opportunities
+        if analysis['correlations']:
+            strong_corr = analysis['correlations'][0]
+            if strong_corr['strength'] == 'positive':
+                recommendations.append(f" **Recommendation**: Focus on increasing {strong_corr['col1']} to potentially boost {strong_corr['col2']}.")
+        # Check for declining trends
+        for trend in analysis['trends']:
+            if trend['direction'] == 'decreasing' and abs(trend['percent_change']) > 10:
+                recommendations.append(f" **Action Required**: {trend['column']} is declining. Consider investigating causes.")
+                break
+        if not recommendations:
+            recommendations.append(" **Status**: No urgent issues detected. Continue monitoring key metrics.")
+        return recommendations
+    def generate_openai_insights(self, df_summary, analysis):
+        """use OpenAI to generate insights"""
+        if not self.use_openai:
+            return self.generate_insights(df_summary, analysis)
+        prompt = f"""
+        You are a data analyst. Analyze this dataset and provide key business insights:
+        Dataset: {df_summary['rows']} rows, {df_summary['columns']} columns
+        Columns: {df_summary['column_names']}
+        Key Statistics: {analysis.get('descriptive_stats', {})}
+        Correlations: {analysis.get('correlations', [])}
+        Trends: {analysis.get('trends', [])}
+        Provide:
+        1. Top 3 key findings
+        2. One actionable recommendation
+        3. One question the user should explore further
+        Keep it concise and business-friendly.
+        """
+        try:
+            response = self.openai.ChatCompletion.create(
+                model="gpt-3.5-turbo",
+                messages=[{"role": "user", "content": prompt}],
+                max_tokens=300
+            )
+            return [response.choices[0].message.content]
+        except Exception as e:
+            print(f"OpenAI error: {e}")
+            return self.generate_insights(df_summary, analysis)

app/main.py ADDED Viewed

	@@ -0,0 +1,646 @@

+"""
+Smart Analytics Copilot - Complete Version
+With Export, OpenAI, Save/Load, Chart Customization, Power BI Export
+"""
+import streamlit as st
+import pandas as pd
+import os
+from datetime import datetime
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+from data_processor import DataProcessor
+from analyzer import Analyzer
+from insight_generator import InsightGenerator
+from dashboard import DashboardGenerator
+from query_engine import QueryEngine
+from export_utils import ExportUtils
+from session_manager import SessionManager
+from chart_customizer import ChartCustomizer
+# Page config
+st.set_page_config(
+    page_title="Smart Analytics Copilot",
+    page_icon="🚀",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# ============ DARK THEME CSS ============
+st.markdown("""
+<style>
+    /* Main background */
+    .stApp {
+        background-color: #0a0e17 !important;
+    }
+    /* All text - light color */
+    .stMarkdown, .stMarkdown p, .stMarkdown div, .stMarkdown span,
+    .stText, p, div, span, label {
+        color: #e8e8e8 !important;
+    }
+    /* Headers */
+    h1, h2, h3, h4, h5, h6 {
+        color: #00ff9d !important;
+        font-weight: 600 !important;
+    }
+    /* Main header */
+    .main-header {
+        font-size: 2.8rem;
+        font-weight: bold;
+        background: linear-gradient(135deg, #00ff9d 0%, #00d4ff 100%);
+        -webkit-background-clip: text;
+        -webkit-text-fill-color: transparent;
+        margin-bottom: 1rem;
+        text-align: center;
+    }
+    /* Sidebar */
+    .css-1d391kg, .stSidebar, .sidebar-content {
+        background-color: #111827 !important;
+    }
+    /* Metrics */
+    div[data-testid="stMetricValue"] {
+        color: #00ff9d !important;
+        font-size: 2rem !important;
+        font-weight: bold !important;
+    }
+    div[data-testid="stMetricLabel"] {
+        color: #a0aec0 !important;
+        font-size: 0.9rem !important;
+    }
+    /* Tabs */
+    .stTabs [data-baseweb="tab-list"] {
+        gap: 4px;
+        background-color: #111827;
+        border-radius: 10px;
+        padding: 6px;
+    }
+    .stTabs [data-baseweb="tab"] {
+        background-color: #1f2937;
+        border-radius: 8px;
+        padding: 8px 24px;
+        color: #e8e8e8 !important;
+    }
+    .stTabs [aria-selected="true"] {
+        background: linear-gradient(135deg, #00ff9d 0%, #00d4ff 100%) !important;
+        color: #0a0e17 !important;
+        font-weight: bold;
+    }
+    /* Buttons */
+    .stButton button {
+        background: linear-gradient(135deg, #00ff9d 0%, #00d4ff 100%) !important;
+        color: #0a0e17 !important;
+        font-weight: bold !important;
+        border: none !important;
+        border-radius: 8px !important;
+    }
+    /* File uploader */
+    .stFileUploader {
+        background-color: #1f2937 !important;
+        border: 2px dashed #374151 !important;
+        border-radius: 12px !important;
+    }
+    /* Expander */
+    .streamlit-expanderHeader {
+        background-color: #1f2937 !important;
+        color: #00ff9d !important;
+        border-radius: 8px;
+    }
+    /* Success/Info/Warning boxes */
+    .stAlert {
+        background-color: #1f2937 !important;
+        border: 1px solid #374151 !important;
+        border-radius: 10px !important;
+    }
+    .stAlert p, .stAlert div {
+        color: #e8e8e8 !important;
+    }
+    /* Dataframe */
+    .stDataFrame {
+        background-color: #111827 !important;
+    }
+    .stDataFrame thead th {
+        background-color: #1f2937 !important;
+        color: #00ff9d !important;
+    }
+    /* Text input */
+    .stTextInput input {
+        background-color: #1f2937 !important;
+        color: #e8e8e8 !important;
+        border: 1px solid #374151 !important;
+        border-radius: 8px !important;
+    }
+    /* Select box */
+    .stSelectbox div[data-baseweb="select"] {
+        background-color: #1f2937 !important;
+        border-color: #374151 !important;
+    }
+    /* Download button */
+    .stDownloadButton button {
+        background: linear-gradient(135deg, #00ff9d 0%, #00d4ff 100%) !important;
+        color: #0a0e17 !important;
+    }
+</style>
+""", unsafe_allow_html=True)
+# Initialize session state
+if 'data_loaded' not in st.session_state:
+    st.session_state.data_loaded = False
+if 'df' not in st.session_state:
+    st.session_state.df = None
+if 'schema' not in st.session_state:
+    st.session_state.schema = None
+if 'analysis' not in st.session_state:
+    st.session_state.analysis = None
+if 'insights' not in st.session_state:
+    st.session_state.insights = None
+if 'charts' not in st.session_state:
+    st.session_state.charts = None
+if 'use_openai' not in st.session_state:
+    st.session_state.use_openai = False
+# Initialize managers
+session_mgr = SessionManager()
+def main():
+    st.markdown('<div class="main-header">🚀 Smart Analytics Copilot</div>', unsafe_allow_html=True)
+    st.caption("✨ Upload any CSV/JSON - AI analyzes, visualizes, and answers questions")
+    st.markdown("---")
+    # Sidebar
+    with st.sidebar:
+        st.markdown("### 📁 Data Source")
+        # Data source selection
+        source = st.radio("Choose data source:", ["📤 Upload File", "💾 Load Saved Session"])
+        if source == "📤 Upload File":
+            uploaded_file = st.file_uploader("Choose CSV or JSON", type=['csv', 'json'])
+            if uploaded_file and not st.session_state.data_loaded:
+                with st.spinner("🔄 Processing your data..."):
+                    process_data(uploaded_file)
+        else:
+            # Load saved sessions
+            sessions = session_mgr.list_sessions()
+            if sessions:
+                session_names = [s['name'] for s in sessions]
+                selected_session = st.selectbox("Select saved session:", session_names)
+                if st.button("📂 Load Session"):
+                    with st.spinner("Loading..."):
+                        load_session(selected_session)
+            else:
+                st.info("No saved sessions found")
+        st.markdown("---")
+        # Settings
+        with st.expander("⚙️ Settings"):
+            st.session_state.use_openai = st.checkbox("Use OpenAI (better insights)",
+                                                      value=st.session_state.use_openai)
+            if st.session_state.use_openai:
+                api_key = st.text_input("OpenAI API Key:", type="password")
+                if api_key:
+                    os.environ['OPENAI_API_KEY'] = api_key
+                    st.success("API Key set!")
+        st.markdown("---")
+        # Export section (only if data loaded)
+        if st.session_state.data_loaded:
+            st.markdown("### 💾 Export Options")
+            export_utils = ExportUtils(st.session_state.df)
+            export_format = st.selectbox("Export format:",
+                                         ["CSV", "Excel", "JSON", "Power BI CSV", "Power BI ZIP (Complete)"])
+            if st.button("📥 Download"):
+                if export_format == "CSV":
+                    data = export_utils.to_csv()
+                    mime = "text/csv"
+                    ext = "csv"
+                elif export_format == "Excel":
+                    data = export_utils.to_excel()
+                    mime = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+                    ext = "xlsx"
+                elif export_format == "JSON":
+                    data = export_utils.to_json()
+                    mime = "application/json"
+                    ext = "json"
+                elif export_format == "Power BI CSV":
+                    data = export_utils.to_powerbi_ready()
+                    mime = "text/csv"
+                    ext = "csv"
+                else:  # Power BI ZIP (Complete)
+                    data = export_utils.to_powerbi_zip()
+                    mime = "application/zip"
+                    ext = "zip"
+                st.download_button(
+                    label="✅ Click to Download",
+                    data=data,
+                    file_name=f"export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.{ext}",
+                    mime=mime
+                )
+            # Save session button
+            st.markdown("---")
+            if st.button("💾 Save Current Session"):
+                name, path = session_mgr.save_session(st.session_state.df, st.session_state.schema)
+                st.success(f"✅ Session saved as: {name}")
+    # Main content
+    if st.session_state.data_loaded:
+        tab1, tab2, tab3, tab4, tab5 = st.tabs([
+            "📊 Dashboard", "💡 AI Insights", "🎨 Custom Charts", "🔍 Query", "📋 Data"
+        ])
+        with tab1:
+            show_dashboard()
+        with tab2:
+            show_insights()
+        with tab3:
+            show_chart_customizer()
+        with tab4:
+            show_query_interface()
+        with tab5:
+            show_data_preview()
+    else:
+        show_welcome()
+def process_data(uploaded_file):
+    """Process uploaded data"""
+    try:
+        processor = DataProcessor()
+        st.session_state.df = processor.load_from_upload(uploaded_file)
+        st.session_state.df = processor.preprocess()
+        st.session_state.schema = processor.detect_schema()
+        analyzer = Analyzer(st.session_state.df, st.session_state.schema)
+        st.session_state.analysis = analyzer.run_full_analysis()
+        # Use OpenAI if enabled
+        api_key = os.environ.get('OPENAI_API_KEY')
+        insight_gen = InsightGenerator(use_openai=st.session_state.use_openai, api_key=api_key)
+        st.session_state.insights = insight_gen.generate_insights(
+            st.session_state.df,
+            st.session_state.schema,
+            st.session_state.analysis
+        )
+        dashboard_gen = DashboardGenerator(st.session_state.df, st.session_state.schema)
+        st.session_state.charts = dashboard_gen.generate_all_charts()
+        st.session_state.data_loaded = True
+        st.success(f"✅ Successfully loaded {len(st.session_state.df):,} rows with {len(st.session_state.df.columns)} columns")
+        st.balloons()
+        st.rerun()
+    except Exception as e:
+        st.error(f"Error: {e}")
+def load_session(session_name):
+    """Load saved session and regenerate insights"""
+    session = session_mgr.load_session(session_name)
+    if session:
+        st.session_state.df = session['df']
+        st.session_state.schema = session['schema']
+        # Regenerate analysis and insights for loaded session
+        with st.spinner("🔄 Regenerating analysis..."):
+            analyzer = Analyzer(st.session_state.df, st.session_state.schema)
+            st.session_state.analysis = analyzer.run_full_analysis()
+            # Regenerate insights
+            api_key = os.environ.get('OPENAI_API_KEY')
+            insight_gen = InsightGenerator(use_openai=st.session_state.use_openai, api_key=api_key)
+            st.session_state.insights = insight_gen.generate_insights(
+                st.session_state.df,
+                st.session_state.schema,
+                st.session_state.analysis
+            )
+            # Regenerate charts
+            dashboard_gen = DashboardGenerator(st.session_state.df, st.session_state.schema)
+            st.session_state.charts = dashboard_gen.generate_all_charts()
+        st.session_state.data_loaded = True
+        st.success(f"✅ Loaded session: {session_name}")
+        st.rerun()
+    else:
+        st.error("Failed to load session")
+def show_dashboard():
+    """Display dashboard"""
+    st.markdown("### 📈 Key Metrics")
+    st.markdown("---")
+    # Check if data exists
+    if st.session_state.df is None:
+        st.warning("No data loaded. Please upload a file first.")
+        return
+    # Display metrics
+    if st.session_state.schema['numeric']:
+        cols = st.columns(min(4, len(st.session_state.schema['numeric'])))
+        for idx, col in enumerate(st.session_state.schema['numeric'][:4]):
+            with cols[idx]:
+                total = st.session_state.df[col].sum()
+                avg = st.session_state.df[col].mean()
+                st.metric(
+                    label=f"💰 {col.upper()}",
+                    value=f"{total:,.0f}",
+                    delta=f"Avg: {avg:,.0f}"
+                )
+    st.markdown("---")
+    st.markdown("### 📊 Visualizations")
+    if st.session_state.charts:
+        for chart in st.session_state.charts[:4]:
+            st.plotly_chart(chart['figure'], use_container_width=True)
+    else:
+        st.info("No charts available. Try uploading data first.")
+    st.markdown("---")
+    st.markdown("### 📋 Summary Statistics")
+    if st.session_state.schema['numeric']:
+        summary = st.session_state.df[st.session_state.schema['numeric']].describe()
+        st.dataframe(summary, use_container_width=True)
+def show_insights():
+    """Display AI insights"""
+    st.markdown("### 🧠 AI-Powered Insights")
+    st.markdown("Here's what we discovered in your data:")
+    st.markdown("---")
+    # Check if insights exist
+    if st.session_state.insights is None:
+        st.info("💡 Insights will appear after data is analyzed.")
+        return
+    for insight in st.session_state.insights:
+        if "Dataset" in insight:
+            st.info(f"📊 {insight}")
+        elif "correlation" in insight.lower():
+            st.success(f"✅ {insight}")
+        elif "skewed" in insight.lower():
+            st.warning(f"📈 {insight}")
+        elif "Recommendation" in insight:
+            st.info(f"💡 {insight}")
+        else:
+            st.markdown(f"• {insight}")
+    # Power BI template section
+    st.markdown("---")
+    with st.expander("📊 Power BI Resources"):
+        export_utils = ExportUtils(st.session_state.df)
+        col1, col2 = st.columns(2)
+        with col1:
+            # Show DAX template
+            template = export_utils.get_powerbi_template()
+            st.code(template, language="dax")
+            st.download_button(
+                label="📥 Download DAX Template",
+                data=template,
+                file_name="powerbi_measures.dax",
+                mime="text/plain"
+            )
+        with col2:
+            # Show instructions
+            instructions = """
+**Power BI Import Steps:**
+1. **Export Data**: Use sidebar to export as "Power BI CSV"
+2. **Open Power BI Desktop**
+3. **Get Data** → **Text/CSV**
+4. **Select your exported CSV**
+5. **Click Load**
+6. **Copy DAX measures** from above
+7. **Create visuals** using the measures
+            """
+            st.info(instructions)
+def show_chart_customizer():
+    """Show chart customization interface"""
+    st.markdown("### 🎨 Custom Chart Builder")
+    st.markdown("Create your own custom visualizations")
+    st.markdown("---")
+    customizer = ChartCustomizer(st.session_state.df)
+    available_charts = customizer.get_available_charts()
+    col1, col2, col3 = st.columns([1, 1, 1])
+    with col1:
+        chart_type = st.selectbox("Chart Type:", available_charts)
+    with col2:
+        # Get appropriate columns
+        if 'Histogram' in chart_type or 'Box' in chart_type:
+            columns = st.session_state.schema['numeric']
+            if not columns:
+                columns = list(st.session_state.df.select_dtypes(include=['number']).columns)
+        elif 'Pie' in chart_type or 'Bar' in chart_type:
+            columns = st.session_state.schema['categorical']
+            if not columns:
+                columns = list(st.session_state.df.select_dtypes(include=['object']).columns)
+        else:
+            columns = list(st.session_state.df.columns)
+        if columns:
+            x_col = st.selectbox("X-Axis / Category:", columns)
+        else:
+            x_col = None
+            st.warning("No suitable columns found")
+    with col3:
+        # For charts that need Y-axis
+        if any(t in chart_type for t in ['Line', 'Scatter', 'Bar']) and 'Histogram' not in chart_type:
+            y_cols = ['None'] + st.session_state.schema['numeric']
+            y_col = st.selectbox("Y-Axis / Value:", y_cols)
+            y_col = None if y_col == 'None' else y_col
+        else:
+            y_col = None
+    # Color column (optional)
+    color_cols = ['None'] + st.session_state.schema['categorical']
+    color_col = st.selectbox("Color By (optional):", color_cols)
+    color_col = None if color_col == 'None' else color_col
+    # Title
+    title = st.text_input("Chart Title:", value=f"{chart_type} of {x_col if x_col else 'data'}")
+    if st.button("🎨 Generate Chart", use_container_width=True):
+        if x_col:
+            with st.spinner("Creating chart..."):
+                fig = customizer.create_chart(chart_type, x_col, y_col, color_col, title)
+                if fig:
+                    st.plotly_chart(fig, use_container_width=True)
+                    # Download chart button
+                    try:
+                        st.download_button(
+                            label="📸 Download as PNG",
+                            data=fig.to_image(format="png"),
+                            file_name="custom_chart.png",
+                            mime="image/png"
+                        )
+                    except:
+                        st.info("💡 Install kaleido for PNG export: `pip install kaleido`")
+                else:
+                    st.error("Could not create chart. Try different settings.")
+        else:
+            st.error("Please select a column for X-Axis")
+def show_query_interface():
+    """Natural language query interface"""
+    st.markdown("### 💬 Natural Language Query")
+    st.markdown("Ask any question about your data in plain English:")
+    st.markdown("---")
+    query_engine = QueryEngine(st.session_state.df, st.session_state.schema)
+    # Example questions
+    with st.expander("🔍 View Example Questions"):
+        if st.session_state.schema['numeric']:
+            example_col = st.session_state.schema['numeric'][0]
+            st.markdown(f"• 'Statistics {example_col}'")
+            st.markdown(f"• 'Total {example_col}'")
+            st.markdown(f"• 'Average {example_col}'")
+        if st.session_state.schema['categorical'] and st.session_state.schema['numeric']:
+            st.markdown(f"• 'Top 5 {st.session_state.schema['categorical'][0]} by {st.session_state.schema['numeric'][0]}'")
+        st.markdown("• 'Summary statistics'")
+        st.markdown("• 'Show me the data'")
+    st.markdown("---")
+    question = st.text_input("Ask a question:", placeholder="e.g., What is the average of time_in_hospital?")
+    if question:
+        with st.spinner("🤔 Analyzing your question..."):
+            answer = query_engine.answer_question(question)
+            st.markdown("### ✅ Answer")
+            st.success(answer)
+def show_data_preview():
+    """Show data preview and info with better formatting"""
+    st.markdown("### 📋 Data Preview")
+    st.markdown("---")
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        st.metric("📊 Total Rows", f"{len(st.session_state.df):,}")
+    with col2:
+        st.metric("📋 Total Columns", len(st.session_state.df.columns))
+    with col3:
+        memory = st.session_state.df.memory_usage(deep=True).sum() / 1024**2
+        st.metric("💾 Memory Usage", f"{memory:.2f} MB")
+    st.markdown("---")
+    st.markdown("### 📄 Data Sample (First 100 rows)")
+    # Create a copy for display
+    display_df = st.session_state.df.head(100).copy()
+    # Clean datetime columns for better display
+    for col in display_df.columns:
+        if 'datetime' in col.lower() or 'date' in col.lower() or 'time' in col.lower():
+            try:
+                display_df[col] = pd.to_datetime(display_df[col]).dt.strftime('%Y-%m-%d %H:%M:%S')
+            except:
+                pass
+    st.dataframe(display_df, use_container_width=True)
+    st.markdown("---")
+    st.markdown("### 📊 Column Information")
+    col_info = pd.DataFrame({
+        'Column': st.session_state.df.columns,
+        'Type': st.session_state.df.dtypes.astype(str),
+        'Non-Null': st.session_state.df.count().values,
+        'Nulls': st.session_state.df.isnull().sum().values,
+        'Unique': st.session_state.df.nunique().values
+    })
+    st.dataframe(col_info, use_container_width=True)
+def show_welcome():
+    """Welcome screen"""
+    st.markdown("""
+    <div style="text-align: center; padding: 2rem; background: linear-gradient(135deg, #111827 0%, #0a0e17 100%); border-radius: 20px; margin: 2rem 0;">
+        <h2 style="color: #00ff9d;">🚀 Welcome to Smart Analytics Copilot</h2>
+        <p style="font-size: 1.1rem;">Upload any CSV or JSON file and let AI analyze it instantly</p>
+        <hr>
+        <p>👈 <strong>Get Started</strong>: Upload a file or load a saved session from the sidebar</p>
+    </div>
+    """, unsafe_allow_html=True)
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        st.markdown("""
+        <div style="background: linear-gradient(135deg, #1f2937 0%, #111827 100%); padding: 1.5rem; border-radius: 15px; text-align: center;">
+            <h3 style="color: #00ff9d;">📊 Auto Dashboard</h3>
+            <p>Smart charts based on your data</p>
+        </div>
+        """, unsafe_allow_html=True)
+    with col2:
+        st.markdown("""
+        <div style="background: linear-gradient(135deg, #1f2937 0%, #111827 100%); padding: 1.5rem; border-radius: 15px; text-align: center;">
+            <h3 style="color: #00ff9d;">💡 AI Insights</h3>
+            <p>Natural language explanations</p>
+        </div>
+        """, unsafe_allow_html=True)
+    with col3:
+        st.markdown("""
+        <div style="background: linear-gradient(135deg, #1f2937 0%, #111827 100%); padding: 1.5rem; border-radius: 15px; text-align: center;">
+            <h3 style="color: #00ff9d;">🎨 Custom Charts</h3>
+            <p>Build your own visualizations</p>
+        </div>
+        """, unsafe_allow_html=True)
+if __name__ == "__main__":
+    main()

app/query_engine.py ADDED Viewed

	@@ -0,0 +1,370 @@

+"""
+Smart Query Engine - Answers ANY question about your data
+Automatically excludes ID columns and handles statistics properly
+"""
+import pandas as pd
+import re
+class QueryEngine:
+    def __init__(self, df, schema):
+        self.df = df
+        self.schema = schema
+        # Filter out ID columns from numeric columns
+        self.numeric_columns = [col for col in self.schema['numeric'] if not self._is_id_column(col)]
+        self.id_columns = [col for col in self.schema['numeric'] if self._is_id_column(col)]
+        # Also check text columns that might be IDs
+        for col in self.schema['text']:
+            if self._is_id_column(col):
+                self.id_columns.append(col)
+        # Print warning about excluded ID columns
+        if self.id_columns:
+            print(f"⚠️ Excluded ID columns from calculations: {self.id_columns}")
+    def _is_id_column(self, col_name):
+        """Check if a column is likely an ID (should not be aggregated)"""
+        col_lower = col_name.lower()
+        # Pattern-based detection
+        id_patterns = ['id', '_id', 'id_', 'key', '_key', 'pk', 'sk', 'uuid', 'guid',
+                       'code', 'number', 'nbr', '_nbr', 'patient', 'encounter']
+        for pattern in id_patterns:
+            if pattern == col_lower or col_lower.endswith(pattern) or col_lower.startswith(pattern):
+                return True
+        # Specific column names
+        exact_id_names = ['id', 'uid', 'uuid', 'row_id', 'record_id', 'encounter_id',
+                         'patient_id', 'customer_id', 'product_id', 'user_id', 'employee_id',
+                         'patient_nbr', 'encounter_nbr', 'member_id']
+        if col_lower in exact_id_names:
+            return True
+        # Uniqueness-based detection (for columns with enough data)
+        if len(self.df) > 10:
+            try:
+                uniqueness = self.df[col_name].nunique() / len(self.df[col_name])
+                # If >80% unique values, it's likely an ID
+                if uniqueness > 0.8:
+                    return True
+            except:
+                pass
+        return False
+    def _get_meaningful_numeric_columns(self):
+        """Return only meaningful numeric columns (exclude IDs)"""
+        if self.numeric_columns:
+            return self.numeric_columns
+        return []
+    def answer_question(self, question):
+        """Answer ANY question about the data"""
+        question_lower = question.lower().strip()
+        # ============ STEP 1: FULL SUMMARY FIRST! ============
+        if any(word in question_lower for word in ['summary statistics', 'summary', 'statistics', 'describe', 'overview', 'tell me about', 'what is in', 'dataset summary']):
+            return self._format_full_summary()
+        # ============ STEP 2: STATISTICS FOR SPECIFIC COLUMN ============
+        stat_patterns = [
+            r'(?:statistics|statistic|summary|stats?|describe)\s+(\w+)',
+            r'(\w+)\s+(?:statistics|statistic|summary|stats?|describe)'
+        ]
+        for pattern in stat_patterns:
+            match = re.search(pattern, question_lower)
+            if match:
+                col_candidate = match.group(1)
+                for col in self.df.columns:
+                    if col.lower() == col_candidate or col_candidate in col.lower():
+                        return self._handle_column_statistics(col)
+        # ============ STEP 3: CHECK FOR ID COLUMN QUESTIONS ============
+        for id_col in self.id_columns:
+            if id_col.lower() in question_lower:
+                return self._handle_id_question(id_col)
+        # ============ STEP 4: NUMERIC CALCULATIONS ============
+        if any(word in question_lower for word in ['total', 'sum', 'add up', 'combined']):
+            result = self._handle_total_question(question_lower)
+            if result:
+                return result
+        if any(word in question_lower for word in ['average', 'mean', 'avg']):
+            result = self._handle_average_question(question_lower)
+            if result:
+                return result
+        if any(word in question_lower for word in ['minimum', 'min', 'lowest', 'smallest', 'least']):
+            result = self._handle_min_question(question_lower)
+            if result:
+                return result
+        if any(word in question_lower for word in ['maximum', 'max', 'highest', 'largest', 'most', 'greatest']):
+            result = self._handle_max_question(question_lower)
+            if result:
+                return result
+        if any(word in question_lower for word in ['top', 'best']):
+            result = self._handle_ranking_question(question_lower)
+            if result:
+                return result
+        # ============ STEP 5: GROUP BY ============
+        if any(word in question_lower for word in ['by', 'per', 'for each', 'grouped by']):
+            result = self._handle_group_question(question_lower)
+            if result:
+                return result
+        # ============ STEP 6: COUNT ============
+        if any(word in question_lower for word in ['count', 'how many', 'number of']):
+            result = self._handle_count_question(question_lower)
+            if result:
+                return result
+        # ============ STEP 7: DATA PREVIEW ============
+        if any(word in question_lower for word in ['show', 'display', 'view', 'preview', 'see', 'list']):
+            result = self._handle_show_question(question_lower)
+            if result:
+                return result
+        # ============ STEP 8: SMART RESPONSE ============
+        return self._smart_response(question_lower)
+    def _handle_column_statistics(self, col_name):
+        """Provide detailed statistics for a specific column"""
+        # Check if it's an ID column
+        if col_name in self.id_columns:
+            return f"""⚠️ **'{col_name}' is an ID column**
+Statistics for ID columns are not meaningful because:
+• IDs are unique identifiers, not measurements
+• Each ID appears only once typically
+**What you CAN do:**
+• Count how many IDs: "{col_name} count"
+• View the data: "Show {col_name}"
+• Analyze other columns: {', '.join(self._get_meaningful_numeric_columns()[:3]) if self._get_meaningful_numeric_columns() else 'None found'}"""
+        # Check if it's a meaningful numeric column
+        elif col_name in self._get_meaningful_numeric_columns():
+            stats = self.df[col_name].describe()
+            output = f"📊 **Statistics for {col_name}**\n\n"
+            output += f"• **Count**: {stats['count']:,.0f}\n"
+            output += f"• **Mean**: {stats['mean']:,.2f}\n"
+            output += f"• **Standard Deviation**: {stats['std']:,.2f}\n"
+            output += f"• **Minimum**: {stats['min']:,.2f}\n"
+            output += f"• **25th Percentile**: {stats['25%']:,.2f}\n"
+            output += f"• **Median (50th)**: {stats['50%']:,.2f}\n"
+            output += f"• **75th Percentile**: {stats['75%']:,.2f}\n"
+            output += f"• **Maximum**: {stats['max']:,.2f}\n"
+            output += f"• **Total**: {self.df[col_name].sum():,.2f}"
+            return output
+        # Check if it's a categorical/text column
+        elif col_name in self.df.columns:
+            output = f"📊 **Statistics for {col_name}**\n\n"
+            output += f"• **Unique values**: {self.df[col_name].nunique():,}\n"
+            output += f"• **Most common**: {self.df[col_name].mode()[0] if len(self.df[col_name].mode()) > 0 else 'N/A'}\n"
+            output += f"• **Missing values**: {self.df[col_name].isnull().sum():,}\n"
+            output += "\n**Top 5 values:**\n"
+            for val, count in self.df[col_name].value_counts().head(5).items():
+                output += f"  • {val}: {count} ({count/len(self.df)*100:.1f}%)\n"
+            return output
+        return f"❌ Column '{col_name}' not found. Available columns: {', '.join(self.df.columns[:10])}..."
+    def _handle_id_question(self, id_col):
+        """Handle questions about ID columns"""
+        unique_count = self.df[id_col].nunique()
+        return f"""⚠️ **'{id_col}' is an ID column** (unique identifier)
+Averages, sums, or other mathematical calculations on ID values are **not meaningful** because:
+• IDs are just labels, not measurements
+• Each ID is typically unique
+**What you can do instead:**
+• Count how many unique IDs: {unique_count} unique values
+• Group data by other columns: "Show [category] by [metric]"
+• Analyze meaningful numeric columns: {', '.join(self._get_meaningful_numeric_columns()[:3]) if self._get_meaningful_numeric_columns() else 'None found'}"""
+    def _handle_total_question(self, question):
+        """Handle total/sum questions"""
+        for col in self._get_meaningful_numeric_columns():
+            if col.lower() in question:
+                total = self.df[col].sum()
+                return f"💰 **Total {col}**: {total:,.2f}"
+        if self._get_meaningful_numeric_columns():
+            col = self._get_meaningful_numeric_columns()[0]
+            total = self.df[col].sum()
+            return f"💰 **Total {col}**: {total:,.2f}"
+        return None
+    def _handle_average_question(self, question):
+        """Handle average/mean questions"""
+        for col in self._get_meaningful_numeric_columns():
+            if col.lower() in question:
+                avg = self.df[col].mean()
+                return f"📊 **Average {col}**: {avg:,.2f}"
+        if self._get_meaningful_numeric_columns():
+            col = self._get_meaningful_numeric_columns()[0]
+            avg = self.df[col].mean()
+            return f"📊 **Average {col}**: {avg:,.2f}"
+        return None
+    def _handle_min_question(self, question):
+        """Handle minimum questions"""
+        for col in self._get_meaningful_numeric_columns():
+            if col.lower() in question:
+                min_val = self.df[col].min()
+                return f"📉 **Minimum {col}**: {min_val:,.2f}"
+        return None
+    def _handle_max_question(self, question):
+        """Handle maximum questions"""
+        for col in self._get_meaningful_numeric_columns():
+            if col.lower() in question:
+                max_val = self.df[col].max()
+                return f"🏆 **Maximum {col}**: {max_val:,.2f}"
+        return None
+    def _handle_ranking_question(self, question):
+        """Handle top/best questions"""
+        n_match = re.search(r'top\s+(\d+)', question)
+        n = int(n_match.group(1)) if n_match else 5
+        metric = None
+        for col in self._get_meaningful_numeric_columns():
+            if col.lower() in question:
+                metric = col
+                break
+        if not metric and self._get_meaningful_numeric_columns():
+            metric = self._get_meaningful_numeric_columns()[0]
+        category = None
+        for col in self.schema['categorical']:
+            if col.lower() in question:
+                category = col
+                break
+        if not category and self.schema['categorical']:
+            category = self.schema['categorical'][0]
+        if metric and category:
+            result = self.df.groupby(category)[metric].sum().sort_values(ascending=False).head(n)
+            output = f"🏆 **Top {n} {category} by {metric}**\n\n"
+            for idx, (item, val) in enumerate(result.items(), 1):
+                output += f"{idx}. **{item}**: {val:,.2f}\n"
+            return output
+        return None
+    def _handle_group_question(self, question):
+        """Handle group by questions"""
+        metric = None
+        category = None
+        for col in self._get_meaningful_numeric_columns():
+            if col.lower() in question:
+                metric = col
+                break
+        for col in self.schema['categorical']:
+            if col.lower() in question:
+                category = col
+                break
+        if metric and category:
+            result = self.df.groupby(category)[metric].sum().sort_values(ascending=False)
+            output = f"📊 **{metric} by {category}**\n\n"
+            for idx, (item, val) in enumerate(result.items(), 1):
+                output += f"{idx}. **{item}**: {val:,.2f}\n"
+            output += f"\n**Total**: {result.sum():,.2f}"
+            return output
+        return None
+    def _handle_count_question(self, question):
+        """Handle count questions"""
+        for col in self.df.columns:
+            if col.lower() in question:
+                unique_count = self.df[col].nunique()
+                return f"📊 **{col}**: {unique_count} unique values"
+        if 'rows' in question or 'records' in question:
+            return f"📊 **Total records**: {len(self.df):,} rows"
+        return None
+    def _handle_show_question(self, question):
+        """Handle show/display questions"""
+        n_match = re.search(r'(\d+)', question)
+        n = int(n_match.group(1)) if n_match else 5
+        output = f"**📊 Data Preview (First {n} rows)**\n\n```\n"
+        output += self.df.head(n).to_string()
+        output += "\n```"
+        return output
+    def _format_full_summary(self):
+        """Provide complete dataset summary"""
+        meaningful_numeric = self._get_meaningful_numeric_columns()
+        output = "📊 **Complete Data Summary**\n\n"
+        output += f"**Dataset Size**: {len(self.df):,} rows × {len(self.df.columns)} columns\n\n"
+        output += "**Column Types:**\n"
+        output += f"• Meaningful numeric columns: {len(meaningful_numeric)}\n"
+        output += f"• ID columns (excluded): {len(self.id_columns)}\n"
+        output += f"• Categorical columns: {len(self.schema['categorical'])}\n"
+        if meaningful_numeric:
+            output += "\n**Key Numeric Statistics:**\n"
+            for col in meaningful_numeric[:5]:
+                output += f"• {col}: Mean={self.df[col].mean():.2f}, Total={self.df[col].sum():,.0f}\n"
+        if self.schema['categorical']:
+            output += "\n**Categorical Columns:**\n"
+            for col in self.schema['categorical'][:3]:
+                output += f"• {col}: {self.df[col].nunique()} unique values\n"
+        return output
+    def _smart_response(self, question):
+        """Generate intelligent response for unrecognized questions"""
+        meaningful_numeric = self._get_meaningful_numeric_columns()
+        output = "💡 **I understand you're asking about your data.**\n\n"
+        output += "📊 **Here's what's available:**\n"
+        output += f"• {len(self.df):,} rows, {len(self.df.columns)} columns\n"
+        if meaningful_numeric:
+            output += f"• Numeric columns to analyze: {', '.join(meaningful_numeric[:5])}\n"
+        if self.schema['categorical']:
+            output += f"• Categories to group by: {', '.join(self.schema['categorical'][:3])}\n"
+        output += "\n📝 **Try these example questions:**\n\n"
+        if meaningful_numeric:
+            example = meaningful_numeric[0]
+            output += f"• 'Statistics {example}'\n"
+            output += f"• 'Total {example}'\n"
+            output += f"• 'Average {example}'\n"
+        if self.schema['categorical'] and meaningful_numeric:
+            output += f"• 'Top 5 {self.schema['categorical'][0]} by {meaningful_numeric[0]}'\n"
+        output += "• 'Summary statistics'\n"
+        output += "• 'Show me the data'"
+        return output

app/session_manager.py ADDED Viewed

	@@ -0,0 +1,82 @@

+"""
+Session Manager - Save and load analysis sessions
+"""
+import json
+import pickle
+import os
+from datetime import datetime
+import pandas as pd
+class SessionManager:
+    def __init__(self, session_dir="saved_sessions"):
+        self.session_dir = session_dir
+        os.makedirs(session_dir, exist_ok=True)
+    def save_session(self, df, schema, name=None):
+        """Save current session"""
+        if name is None:
+            name = f"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
+        session_data = {
+            'name': name,
+            'timestamp': datetime.now().isoformat(),
+            'data': df.to_dict('records'),
+            'columns': list(df.columns),
+            'dtypes': df.dtypes.astype(str).to_dict(),
+            'schema': schema,
+            'shape': df.shape
+        }
+        filepath = os.path.join(self.session_dir, f"{name}.pkl")
+        with open(filepath, 'wb') as f:
+            pickle.dump(session_data, f)
+        return name, filepath
+    def load_session(self, name):
+        """Load saved session"""
+        filepath = os.path.join(self.session_dir, name)
+        if not os.path.exists(filepath):
+            # Try with .pkl extension
+            filepath = f"{filepath}.pkl"
+            if not os.path.exists(filepath):
+                return None
+        with open(filepath, 'rb') as f:
+            session_data = pickle.load(f)
+        # Reconstruct DataFrame
+        df = pd.DataFrame(session_data['data'])
+        return {
+            'df': df,
+            'schema': session_data['schema'],
+            'name': session_data['name'],
+            'timestamp': session_data['timestamp']
+        }
+    def list_sessions(self):
+        """List all saved sessions"""
+        sessions = []
+        for file in os.listdir(self.session_dir):
+            if file.endswith('.pkl'):
+                filepath = os.path.join(self.session_dir, file)
+                with open(filepath, 'rb') as f:
+                    data = pickle.load(f)
+                sessions.append({
+                    'name': data['name'],
+                    'timestamp': data['timestamp'],
+                    'rows': data['shape'][0],
+                    'columns': data['shape'][1],
+                    'file': file
+                })
+        return sorted(sessions, key=lambda x: x['timestamp'], reverse=True)
+    def delete_session(self, name):
+        """Delete a saved session"""
+        filepath = os.path.join(self.session_dir, name)
+        if os.path.exists(filepath):
+            os.remove(filepath)
+            return True
+        return False

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+# Save this as requirements.txt
+fastapi==0.104.1
+uvicorn==0.24.0
+pandas==2.1.3
+numpy==1.24.3
+scipy==1.11.4
+plotly==5.18.0
+streamlit==1.29.0
+openai==1.3.0
+python-multipart==0.0.6
+sqlalchemy==2.0.23
+jinja2==3.1.2
+openai==1.3.0
+openpyxl==3.1.2
+python-dotenv==1.0.0

run.py ADDED Viewed

	@@ -0,0 +1,22 @@

+"""
+Run the Smart Analytics Copilot
+"""
+import subprocess
+import sys
+def main():
+    print("🚀 Starting Smart Analytics Copilot...")
+    print("📊 Your dashboard will open in your browser")
+    print("")
+    # Run streamlit
+    subprocess.run([
+        sys.executable, "-m", "streamlit", "run",
+        "app/main.py",
+        "--server.port", "8501",
+        "--server.address", "localhost"
+    ])
+if __name__ == "__main__":
+    main()

saved_sessions/session_20260418_131145.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:55f78a61b8f7c53962476b6d487b8be7b9139e4d925df527ee3ad37c5a746b00
+size 95913946

saved_sessions/session_20260418_132524.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1c982dc574744594a1292c45e67c38b85cb5d8b7e8a7d8f96f35350743245041
+size 30056318

saved_sessions/session_20260418_135615.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1f50bab79550ce31f3304228b8d0f5eac2c786b86d2f48ff09967eec9bd0c52c
+size 30056318

saved_sessions/session_20260418_135934.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2bac5b4a06a4abe31fcdf26c1999bcdd5fb288e5b150fdc52665de8b998be62d
+size 360452