salihfurkaan commited on
Commit
bbdd10b
·
1 Parent(s): 9333089

Add Chat, Dashboard, and Export features

Browse files
Files changed (2) hide show
  1. app.py +54 -8
  2. src/visualization.py +31 -4
app.py CHANGED
@@ -7,7 +7,7 @@ from src.profiling import profile_data, get_overview_text
7
  from src.cleaning import clean_data
8
  from src.anomalies import detect_anomalies
9
  from src.visualization import generate_charts
10
- from src.llm import get_insights, get_followup_questions
11
 
12
  # Global state to hold the dataframe for chat (if needed in future)
13
  # For this stateless demo, we process per request.
@@ -18,17 +18,19 @@ def analyze_dataset(file_obj, api_token):
18
  if file_obj is None:
19
  return (
20
  "## Please upload a file to begin.",
 
21
  "",
22
  None,
23
  "",
24
  pd.DataFrame(),
25
- ""
 
26
  )
27
 
28
  # 1. Ingestion
29
  df, error = load_file(file_obj)
30
  if error:
31
- return f"## Error: {error}", "", None, "", pd.DataFrame(), ""
32
 
33
  # 2. Profiling & Cleaning
34
  # flexible cleaning: we verify and clean column names for consistent access
@@ -40,6 +42,7 @@ def analyze_dataset(file_obj, api_token):
40
  anomalies_df, anomaly_summary = detect_anomalies(df_clean)
41
 
42
  # 4. Visualization
 
43
  chart_figure = generate_charts(df_clean, profile)
44
 
45
  # 5. LLM Insights & Questions
@@ -50,14 +53,19 @@ def analyze_dataset(file_obj, api_token):
50
  # Format Outputs
51
  overview_output = f"{overview_text}\n\n**Data Cleaning Log:**\n" + "\n".join([f"- {item}" for item in cleaning_log])
52
 
 
 
 
 
53
  return (
54
  overview_output, # Dataset Overview (Markdown)
55
- df_clean.head(), # Dataset Overview (DataFrame) matches UI expectation
56
  insights, # Key Insights
57
  chart_figure, # Visual Story
58
  f"### Anomaly Detection Report\n{anomaly_summary}", # Anomalies Markdown
59
  anomalies_df, # Anomalies DataFrame
60
- questions # Next Steps
 
61
  )
62
 
63
  def load_example():
@@ -76,15 +84,32 @@ def load_example():
76
  df.to_csv("example_dataset.csv", index=False)
77
  return "example_dataset.csv"
78
 
 
 
 
 
 
 
 
 
 
 
79
  # Updated process function wrapper to match inputs/outputs
80
  def process_file_wrapper(file_obj, api_token):
81
  results = analyze_dataset(file_obj, api_token)
82
- return results
 
 
 
 
83
 
84
  with gr.Blocks(title="Auto Data Analyst", theme=gr.themes.Soft()) as demo:
85
  gr.Markdown("# 📊 Auto Data Analyst — No Questions Needed")
86
  gr.Markdown("Upload your structured data (CSV, Excel, JSON, Parquet) and get instant professional insights.")
87
 
 
 
 
88
  with gr.Row():
89
  with gr.Column(scale=1):
90
  with gr.Group():
@@ -103,25 +128,46 @@ with gr.Blocks(title="Auto Data Analyst", theme=gr.themes.Soft()) as demo:
103
  with gr.TabItem("Dataset Overview"):
104
  overview_md = gr.Markdown("Please upload a file to see the overview.")
105
  dataframe_view = gr.Dataframe(interactive=False, label="Data Preview")
 
106
 
107
  with gr.TabItem("Key Insights"):
108
  insights_md = gr.Markdown("Insights will appear here.")
109
 
110
  with gr.TabItem("Visual Story"):
111
- charts_plot = gr.Plot(label="Data Visualization")
112
 
113
  with gr.TabItem("Anomalies & Outliers"):
114
  anomalies_md = gr.Markdown("Anomaly detection results.")
115
  anomalies_df_view = gr.Dataframe(interactive=False, label="Detected Anomalies")
116
 
 
 
 
 
 
 
 
117
  with gr.TabItem("Next Steps"):
118
  questions_md = gr.Markdown("Suggested follow-up questions.")
119
 
120
  # Event wiring
 
 
 
121
  file_upload.change(
122
  fn=process_file_wrapper,
123
  inputs=[file_upload, api_token_input],
124
- outputs=[overview_md, dataframe_view, insights_md, charts_plot, anomalies_md, anomalies_df_view, questions_md]
 
 
 
 
 
 
 
 
 
 
125
  )
126
 
127
  example_btn.click(
 
7
  from src.cleaning import clean_data
8
  from src.anomalies import detect_anomalies
9
  from src.visualization import generate_charts
10
+ from src.llm import get_insights, get_followup_questions, ask_llm
11
 
12
  # Global state to hold the dataframe for chat (if needed in future)
13
  # For this stateless demo, we process per request.
 
18
  if file_obj is None:
19
  return (
20
  "## Please upload a file to begin.",
21
+ pd.DataFrame(),
22
  "",
23
  None,
24
  "",
25
  pd.DataFrame(),
26
+ "",
27
+ None # For download file
28
  )
29
 
30
  # 1. Ingestion
31
  df, error = load_file(file_obj)
32
  if error:
33
+ return f"## Error: {error}", pd.DataFrame(), "", None, "", pd.DataFrame(), "", None
34
 
35
  # 2. Profiling & Cleaning
36
  # flexible cleaning: we verify and clean column names for consistent access
 
42
  anomalies_df, anomaly_summary = detect_anomalies(df_clean)
43
 
44
  # 4. Visualization
45
+ # Now returns a subplot figure
46
  chart_figure = generate_charts(df_clean, profile)
47
 
48
  # 5. LLM Insights & Questions
 
53
  # Format Outputs
54
  overview_output = f"{overview_text}\n\n**Data Cleaning Log:**\n" + "\n".join([f"- {item}" for item in cleaning_log])
55
 
56
+ # Save cleaned data for download
57
+ output_path = "cleaned_data.csv"
58
+ df_clean.to_csv(output_path, index=False)
59
+
60
  return (
61
  overview_output, # Dataset Overview (Markdown)
62
+ df_clean.head(), # Dataset Overview (DataFrame)
63
  insights, # Key Insights
64
  chart_figure, # Visual Story
65
  f"### Anomaly Detection Report\n{anomaly_summary}", # Anomalies Markdown
66
  anomalies_df, # Anomalies DataFrame
67
+ questions, # Next Steps
68
+ output_path # Download File Path
69
  )
70
 
71
  def load_example():
 
84
  df.to_csv("example_dataset.csv", index=False)
85
  return "example_dataset.csv"
86
 
87
+ # Wrapper for chat to partial-bind state (overview_text, etc)
88
+ # But Gradio ChatInterface doesn't easily allow passing extra state dynamically from another component's output
89
+ # unless using global state or state components.
90
+ # We will use a gr.State component to store the overview text.
91
+
92
+ def chat_response(message, history, overview_text, api_token):
93
+ if not overview_text:
94
+ return "Please upload and analyze a dataset first."
95
+ return ask_llm(message, history, overview_text, api_token)
96
+
97
  # Updated process function wrapper to match inputs/outputs
98
  def process_file_wrapper(file_obj, api_token):
99
  results = analyze_dataset(file_obj, api_token)
100
+ # res structure: (overview_md, df_head, insights, chart, anomalies_md, anomalies_df, questions, download_path)
101
+ # We also need to return overview_md to the state component
102
+
103
+ # Return all UI outputs + State
104
+ return results + (results[0],) # Append overview_md for the state
105
 
106
  with gr.Blocks(title="Auto Data Analyst", theme=gr.themes.Soft()) as demo:
107
  gr.Markdown("# 📊 Auto Data Analyst — No Questions Needed")
108
  gr.Markdown("Upload your structured data (CSV, Excel, JSON, Parquet) and get instant professional insights.")
109
 
110
+ # State to hold the overview text for the chatbot
111
+ overview_state = gr.State()
112
+
113
  with gr.Row():
114
  with gr.Column(scale=1):
115
  with gr.Group():
 
128
  with gr.TabItem("Dataset Overview"):
129
  overview_md = gr.Markdown("Please upload a file to see the overview.")
130
  dataframe_view = gr.Dataframe(interactive=False, label="Data Preview")
131
+ download_btn = gr.DownloadButton("Download Cleaned Data", label="Download CSV")
132
 
133
  with gr.TabItem("Key Insights"):
134
  insights_md = gr.Markdown("Insights will appear here.")
135
 
136
  with gr.TabItem("Visual Story"):
137
+ charts_plot = gr.Plot(label="Data Visualization Dashboard")
138
 
139
  with gr.TabItem("Anomalies & Outliers"):
140
  anomalies_md = gr.Markdown("Anomaly detection results.")
141
  anomalies_df_view = gr.Dataframe(interactive=False, label="Detected Anomalies")
142
 
143
+ with gr.TabItem("Chat with Data"):
144
+ chatbot = gr.ChatInterface(
145
+ fn=chat_response,
146
+ additional_inputs=[overview_state, api_token_input],
147
+ type="messages"
148
+ )
149
+
150
  with gr.TabItem("Next Steps"):
151
  questions_md = gr.Markdown("Suggested follow-up questions.")
152
 
153
  # Event wiring
154
+ # We need to update analyze_dataset to return the overview_text to the state as well
155
+ # So we need a wrapper function that maps outputs correctly
156
+
157
  file_upload.change(
158
  fn=process_file_wrapper,
159
  inputs=[file_upload, api_token_input],
160
+ outputs=[
161
+ overview_md,
162
+ dataframe_view,
163
+ insights_md,
164
+ charts_plot,
165
+ anomalies_md,
166
+ anomalies_df_view,
167
+ questions_md,
168
+ download_btn,
169
+ overview_state
170
+ ]
171
  )
172
 
173
  example_btn.click(
src/visualization.py CHANGED
@@ -49,8 +49,35 @@ def generate_charts(df, profile):
49
  # or create a subplot.
50
  # Let's return the Correlation Matrix as the "Visual Story" hero if available, otherwise a distribution.
51
 
52
- if figures:
53
- # Return the first one as the hero
54
- return figures[0]
55
 
56
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  # or create a subplot.
50
  # Let's return the Correlation Matrix as the "Visual Story" hero if available, otherwise a distribution.
51
 
52
+ # Create a subplot figure
53
+ import plotly.subplots as sp
 
54
 
55
+ rows = 2
56
+ cols = 2
57
+ titles = []
58
+
59
+ # Logic to pick 4 charts max
60
+ charts_to_show = figures[:4]
61
+
62
+ # We can't easily merge arbitrary Plotly express figures into subplots directly while keeping all their properties perfect
63
+ # without some work, but we can try stacking them or just return the list and let Gradio row/column handle it.
64
+ # Actually, Gradio's Plot component handles one figure.
65
+ # Let's try to create a nice dashboard using make_subplots if possible,
66
+ # OR just return a list of figures and update app.py to have multiple Plot components.
67
+ # The requirement is "Enhance Visualizations".
68
+
69
+ # Approach: Let's use specific graph_objects to build a 2x2 grid manually or stick to the list if App supports it.
70
+ # Easier improvement for now: Return the list of figures, and we update App to render them in a gallery.
71
+ # BUT, the function signature in app.py expects one output for `charts_plot`.
72
+ # Let's try to make a subplot.
73
+
74
+ fig = sp.make_subplots(rows=2, cols=2, subplot_titles=[f.layout.title.text for f in charts_to_show])
75
+
76
+ for i, f in enumerate(charts_to_show):
77
+ row = (i // 2) + 1
78
+ col = (i % 2) + 1
79
+ for trace in f.data:
80
+ fig.add_trace(trace, row=row, col=col)
81
+
82
+ fig.update_layout(height=800, title_text="Data Visualization Dashboard", showlegend=False)
83
+ return fig