Spaces:

salihfurkaan
/

auto-data-analyst

Sleeping

App Files Files Community

salihfurkaan commited on 17 days ago

Commit

bbdd10b

1 Parent(s): 9333089

Add Chat, Dashboard, and Export features

Browse files

Files changed (2) hide show

app.py +54 -8
src/visualization.py +31 -4

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ from src.profiling import profile_data, get_overview_text
 from src.cleaning import clean_data
 from src.anomalies import detect_anomalies
 from src.visualization import generate_charts
-from src.llm import get_insights, get_followup_questions
 # Global state to hold the dataframe for chat (if needed in future)
 # For this stateless demo, we process per request.
@@ -18,17 +18,19 @@ def analyze_dataset(file_obj, api_token):
     if file_obj is None:
         return (
             "## Please upload a file to begin.",
             "",
             None,
             "",
             pd.DataFrame(),
-            ""
         )
     # 1. Ingestion
     df, error = load_file(file_obj)
     if error:
-        return f"## Error: {error}", "", None, "", pd.DataFrame(), ""
     # 2. Profiling & Cleaning
     # flexible cleaning: we verify and clean column names for consistent access
@@ -40,6 +42,7 @@ def analyze_dataset(file_obj, api_token):
     anomalies_df, anomaly_summary = detect_anomalies(df_clean)
     # 4. Visualization
     chart_figure = generate_charts(df_clean, profile)
     # 5. LLM Insights & Questions
@@ -50,14 +53,19 @@ def analyze_dataset(file_obj, api_token):
     # Format Outputs
     overview_output = f"{overview_text}\n\n**Data Cleaning Log:**\n" + "\n".join([f"- {item}" for item in cleaning_log])
     return (
         overview_output,        # Dataset Overview (Markdown)
-        df_clean.head(),        # Dataset Overview (DataFrame) matches UI expectation
         insights,               # Key Insights
         chart_figure,           # Visual Story
         f"### Anomaly Detection Report\n{anomaly_summary}", # Anomalies Markdown
         anomalies_df,           # Anomalies DataFrame
-        questions               # Next Steps
     )
 def load_example():
@@ -76,15 +84,32 @@ def load_example():
     df.to_csv("example_dataset.csv", index=False)
     return "example_dataset.csv"
 # Updated process function wrapper to match inputs/outputs
 def process_file_wrapper(file_obj, api_token):
     results = analyze_dataset(file_obj, api_token)
-    return results
 with gr.Blocks(title="Auto Data Analyst", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 📊 Auto Data Analyst — No Questions Needed")
     gr.Markdown("Upload your structured data (CSV, Excel, JSON, Parquet) and get instant professional insights.")
     with gr.Row():
         with gr.Column(scale=1):
             with gr.Group():
@@ -103,25 +128,46 @@ with gr.Blocks(title="Auto Data Analyst", theme=gr.themes.Soft()) as demo:
                 with gr.TabItem("Dataset Overview"):
                     overview_md = gr.Markdown("Please upload a file to see the overview.")
                     dataframe_view = gr.Dataframe(interactive=False, label="Data Preview")
                 with gr.TabItem("Key Insights"):
                     insights_md = gr.Markdown("Insights will appear here.")
                 with gr.TabItem("Visual Story"):
-                    charts_plot = gr.Plot(label="Data Visualization")
                 with gr.TabItem("Anomalies & Outliers"):
                     anomalies_md = gr.Markdown("Anomaly detection results.")
                     anomalies_df_view = gr.Dataframe(interactive=False, label="Detected Anomalies")
                 with gr.TabItem("Next Steps"):
                     questions_md = gr.Markdown("Suggested follow-up questions.")
     # Event wiring
     file_upload.change(
         fn=process_file_wrapper,
         inputs=[file_upload, api_token_input],
-        outputs=[overview_md, dataframe_view, insights_md, charts_plot, anomalies_md, anomalies_df_view, questions_md]
     )
     example_btn.click(

 from src.cleaning import clean_data
 from src.anomalies import detect_anomalies
 from src.visualization import generate_charts
+from src.llm import get_insights, get_followup_questions, ask_llm
 # Global state to hold the dataframe for chat (if needed in future)
 # For this stateless demo, we process per request.
     if file_obj is None:
         return (
             "## Please upload a file to begin.",
+            pd.DataFrame(),
             "",
             None,
             "",
             pd.DataFrame(),
+            "",
+            None # For download file
         )
     # 1. Ingestion
     df, error = load_file(file_obj)
     if error:
+        return f"## Error: {error}", pd.DataFrame(), "", None, "", pd.DataFrame(), "", None
     # 2. Profiling & Cleaning
     # flexible cleaning: we verify and clean column names for consistent access
     anomalies_df, anomaly_summary = detect_anomalies(df_clean)
     # 4. Visualization
+    # Now returns a subplot figure
     chart_figure = generate_charts(df_clean, profile)
     # 5. LLM Insights & Questions
     # Format Outputs
     overview_output = f"{overview_text}\n\n**Data Cleaning Log:**\n" + "\n".join([f"- {item}" for item in cleaning_log])
+    # Save cleaned data for download
+    output_path = "cleaned_data.csv"
+    df_clean.to_csv(output_path, index=False)
     return (
         overview_output,        # Dataset Overview (Markdown)
+        df_clean.head(),        # Dataset Overview (DataFrame)
         insights,               # Key Insights
         chart_figure,           # Visual Story
         f"### Anomaly Detection Report\n{anomaly_summary}", # Anomalies Markdown
         anomalies_df,           # Anomalies DataFrame
+        questions,              # Next Steps
+        output_path             # Download File Path
     )
 def load_example():
     df.to_csv("example_dataset.csv", index=False)
     return "example_dataset.csv"
+# Wrapper for chat to partial-bind state (overview_text, etc)
+# But Gradio ChatInterface doesn't easily allow passing extra state dynamically from another component's output
+# unless using global state or state components.
+# We will use a gr.State component to store the overview text.
+def chat_response(message, history, overview_text, api_token):
+    if not overview_text:
+        return "Please upload and analyze a dataset first."
+    return ask_llm(message, history, overview_text, api_token)
 # Updated process function wrapper to match inputs/outputs
 def process_file_wrapper(file_obj, api_token):
     results = analyze_dataset(file_obj, api_token)
+    # res structure: (overview_md, df_head, insights, chart, anomalies_md, anomalies_df, questions, download_path)
+    # We also need to return overview_md to the state component
+    # Return all UI outputs + State
+    return results + (results[0],) # Append overview_md for the state
 with gr.Blocks(title="Auto Data Analyst", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 📊 Auto Data Analyst — No Questions Needed")
     gr.Markdown("Upload your structured data (CSV, Excel, JSON, Parquet) and get instant professional insights.")
+    # State to hold the overview text for the chatbot
+    overview_state = gr.State()
     with gr.Row():
         with gr.Column(scale=1):
             with gr.Group():
                 with gr.TabItem("Dataset Overview"):
                     overview_md = gr.Markdown("Please upload a file to see the overview.")
                     dataframe_view = gr.Dataframe(interactive=False, label="Data Preview")
+                    download_btn = gr.DownloadButton("Download Cleaned Data", label="Download CSV")
                 with gr.TabItem("Key Insights"):
                     insights_md = gr.Markdown("Insights will appear here.")
                 with gr.TabItem("Visual Story"):
+                    charts_plot = gr.Plot(label="Data Visualization Dashboard")
                 with gr.TabItem("Anomalies & Outliers"):
                     anomalies_md = gr.Markdown("Anomaly detection results.")
                     anomalies_df_view = gr.Dataframe(interactive=False, label="Detected Anomalies")
+                with gr.TabItem("Chat with Data"):
+                    chatbot = gr.ChatInterface(
+                        fn=chat_response,
+                        additional_inputs=[overview_state, api_token_input],
+                        type="messages"
+                    )
                 with gr.TabItem("Next Steps"):
                     questions_md = gr.Markdown("Suggested follow-up questions.")
     # Event wiring
+    # We need to update analyze_dataset to return the overview_text to the state as well
+    # So we need a wrapper function that maps outputs correctly
     file_upload.change(
         fn=process_file_wrapper,
         inputs=[file_upload, api_token_input],
+        outputs=[
+            overview_md,
+            dataframe_view,
+            insights_md,
+            charts_plot,
+            anomalies_md,
+            anomalies_df_view,
+            questions_md,
+            download_btn,
+            overview_state
+        ]
     )
     example_btn.click(

src/visualization.py CHANGED Viewed

@@ -49,8 +49,35 @@ def generate_charts(df, profile):
     # or create a subplot.
     # Let's return the Correlation Matrix as the "Visual Story" hero if available, otherwise a distribution.
-    if figures:
-        # Return the first one as the hero
-        return figures[0]
-    return None

     # or create a subplot.
     # Let's return the Correlation Matrix as the "Visual Story" hero if available, otherwise a distribution.
+    # Create a subplot figure
+    import plotly.subplots as sp
+    rows = 2
+    cols = 2
+    titles = []
+    # Logic to pick 4 charts max
+    charts_to_show = figures[:4]
+    # We can't easily merge arbitrary Plotly express figures into subplots directly while keeping all their properties perfect
+    # without some work, but we can try stacking them or just return the list and let Gradio row/column handle it.
+    # Actually, Gradio's Plot component handles one figure.
+    # Let's try to create a nice dashboard using make_subplots if possible,
+    # OR just return a list of figures and update app.py to have multiple Plot components.
+    # The requirement is "Enhance Visualizations".
+    # Approach: Let's use specific graph_objects to build a 2x2 grid manually or stick to the list if App supports it.
+    # Easier improvement for now: Return the list of figures, and we update App to render them in a gallery.
+    # BUT, the function signature in app.py expects one output for `charts_plot`.
+    # Let's try to make a subplot.
+    fig = sp.make_subplots(rows=2, cols=2, subplot_titles=[f.layout.title.text for f in charts_to_show])
+    for i, f in enumerate(charts_to_show):
+        row = (i // 2) + 1
+        col = (i % 2) + 1
+        for trace in f.data:
+            fig.add_trace(trace, row=row, col=col)
+    fig.update_layout(height=800, title_text="Data Visualization Dashboard", showlegend=False)
+    return fig