alexacido commited on
Commit
85c7df6
·
verified ·
1 Parent(s): 56295c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -89
app.py CHANGED
@@ -1,17 +1,24 @@
 
1
  import os
2
  import io
3
  import re
4
  import gradio as gr
5
  import pandas as pd
6
- import openai
7
  import matplotlib.pyplot as plt
8
- from dotenv import load_dotenv
9
  from PIL import Image
10
  import traceback
11
 
12
- # Load your OpenAI API key from the environment (Hugging Face Spaces secrets will populate it)
13
- load_dotenv()
14
- openai.api_key = os.getenv("OPENAI_API_KEY")
 
 
 
 
 
 
 
 
15
 
16
  def load_file(file):
17
  """Load a CSV or Excel file into a pandas DataFrame."""
@@ -25,12 +32,12 @@ def load_file(file):
25
  if file_path is None:
26
  return None
27
  try:
28
- if file_name.endswith('.csv'):
29
  df = pd.read_csv(file_path)
30
- elif file_name.endswith('.xlsx'):
31
- df = pd.read_excel(file_path, engine='openpyxl')
32
- elif file_name.endswith('.xls'):
33
- df = pd.read_excel(file_path, engine='xlrd')
34
  else:
35
  return None
36
  except Exception as e:
@@ -41,12 +48,12 @@ def load_file(file):
41
  # Assume file is a file-like object (as on your local machine)
42
  file_name = file.name.lower()
43
  try:
44
- if file_name.endswith('.csv'):
45
  df = pd.read_csv(file)
46
- elif file_name.endswith('.xlsx'):
47
- df = pd.read_excel(file, engine='openpyxl')
48
- elif file_name.endswith('.xls'):
49
- df = pd.read_excel(file, engine='xlrd')
50
  else:
51
  return None
52
  except Exception as e:
@@ -54,24 +61,16 @@ def load_file(file):
54
  return None
55
  return df
56
 
 
57
  def preview_file(file):
58
  """Return the DataFrame for preview."""
59
  df = load_file(file)
60
  if df is None:
61
- # Return a DataFrame with an error message instead of a plain string
62
  return pd.DataFrame({"Error": ["Error loading file or unsupported file type."]})
63
  return df
64
 
 
65
  def generate_basic_understanding_code(df_preview):
66
- """
67
- Generate Python code that performs an exploratory analysis on the DataFrame.
68
- The generated code should output a variable 'basic_info' that is a dictionary containing:
69
- - The data types of each column.
70
- - For numeric columns, summary statistics (mean, median, std, etc.).
71
- - For non-numeric columns, counts, unique values, mode, and frequency distributions.
72
- If charts are generated, ensure plt.show() is called after each chart so they can be captured.
73
- Note: When converting dates, use pd.to_datetime() without a fixed format or with dayfirst=True.
74
- """
75
  prompt = f"""
76
  You are a data analysis expert. Write Python code that performs an exploratory analysis of the DataFrame.
77
  Assume a pandas DataFrame named 'df' is already loaded.
@@ -83,37 +82,26 @@ For each column in df, include its data type.
83
  When converting date strings to datetime, use pd.to_datetime() without a fixed format or with dayfirst=True.
84
  If your analysis includes charts, call plt.show() after each chart so they can be captured.
85
  Only reference columns that are present in df.columns.
86
-
87
  Note: The following safe built-ins are available: list, dict, set, tuple, abs, min, max, sum, len, range, print, pd, plt, __import__.
88
-
89
  DataFrame preview:
90
  Columns: {list(df_preview.columns)}
91
  Sample Data (first 3 rows):
92
  {df_preview.head(3).to_dict(orient='records')}
93
  """
94
- response = openai.chat.completions.create(
95
- model="gpt-4o-mini",
96
  messages=[
97
  {"role": "system", "content": "You are an expert data analysis assistant who outputs only raw Python code."},
98
- {"role": "user", "content": prompt}
99
  ],
100
  temperature=0.3,
101
  max_tokens=3500,
102
  )
103
- code = response.choices[0].message.content.strip()
104
  return code
105
 
 
106
  def generate_problem_solving_code(nl_query, df_preview, basic_info):
107
- """
108
- Generate Python code that solves the user's analysis query.
109
- The code should assume that the DataFrame 'df' is loaded and that the variable 'basic_info'
110
- (the output from the initial exploratory analysis) is available.
111
- The final analysis should be assigned to a variable named 'result' as a dictionary with keys:
112
- 'summary', 'detailed_stats', 'insights', and 'chart_descriptions'.
113
- If charts are generated, call plt.show() after each chart so they can be captured.
114
- Note: When converting date strings to datetime, use pd.to_datetime() without a fixed format or with dayfirst=True.
115
- Only reference columns that are present in df.columns.
116
- """
117
  prompt = f"""
118
  You are a data analysis expert. Write Python code that performs the analysis as described below.
119
  Assume a pandas DataFrame named 'df' is already loaded and that you have already generated an exploratory summary stored in 'basic_info'.
@@ -127,28 +115,26 @@ Incorporate insights from 'basic_info' if relevant.
127
  When converting date strings to datetime, use pd.to_datetime() without a fixed format or with dayfirst=True.
128
  If your analysis includes charts, call plt.show() after each chart so they can be captured.
129
  Only reference columns that are present in df.columns.
130
-
131
  Note: The following safe built-ins are available: list, dict, set, tuple, abs, min, max, sum, len, range, print, pd, plt, __import__.
132
-
133
  DataFrame preview:
134
  Columns: {list(df_preview.columns)}
135
  Sample Data (first 3 rows):
136
  {df_preview.head(3).to_dict(orient='records')}
137
-
138
  User Query: "{nl_query}"
139
  """
140
- response = openai.chat.completions.create(
141
- model="gpt-4o-mini",
142
  messages=[
143
  {"role": "system", "content": "You are an expert data analysis assistant who outputs only raw Python code."},
144
- {"role": "user", "content": prompt}
145
  ],
146
  temperature=0.3,
147
  max_tokens=3500,
148
  )
149
- code = response.choices[0].message.content.strip()
150
  return code
151
 
 
152
  def validate_generated_code(code, df):
153
  """
154
  Validate that the generated code references only columns that exist in the DataFrame.
@@ -161,6 +147,7 @@ def validate_generated_code(code, df):
161
  return False, missing_cols
162
  return True, []
163
 
 
164
  def safe_exec_code(code, df, capture_charts=True, interactive=False, extra_globals=None):
165
  """
166
  Execute the generated code in a restricted namespace.
@@ -171,13 +158,15 @@ def safe_exec_code(code, df, capture_charts=True, interactive=False, extra_globa
171
  code_lines = code.splitlines()
172
  clean_lines = [line for line in code_lines if not line.strip().startswith("```")]
173
  clean_code = "\n".join(clean_lines).strip()
174
-
175
  # Validate that the generated code references only existing DataFrame columns.
176
  valid, missing_cols = validate_generated_code(clean_code, df)
177
  if not valid:
178
- return (f"Generated code references missing columns: {missing_cols}\nPlease adjust your prompt or data.",
179
- [])
180
-
 
 
181
  # Expanded safe built-ins. Including float, int, bool, etc.
182
  safe_builtins = {
183
  "abs": abs,
@@ -205,10 +194,11 @@ def safe_exec_code(code, df, capture_charts=True, interactive=False, extra_globa
205
  "__import__": __import__,
206
  }
207
  safe_globals = {"__builtins__": safe_builtins, "df": df, "plt": plt, "charts": []}
208
-
209
  # Pre-import seaborn as sns if available.
210
  try:
211
  import seaborn as sns
 
212
  safe_globals["sns"] = sns
213
  except ImportError:
214
  pass
@@ -216,8 +206,9 @@ def safe_exec_code(code, df, capture_charts=True, interactive=False, extra_globa
216
  if extra_globals is not None:
217
  safe_globals.update(extra_globals)
218
  safe_locals = {}
219
-
220
  if capture_charts:
 
221
  def custom_show(*args, **kwargs):
222
  buf = io.BytesIO()
223
  plt.savefig(buf, format="png")
@@ -225,24 +216,30 @@ def safe_exec_code(code, df, capture_charts=True, interactive=False, extra_globa
225
  img = Image.open(buf).convert("RGB")
226
  safe_globals["charts"].append(img)
227
  plt.close()
 
228
  safe_globals["plt"].show = custom_show
229
-
230
  try:
231
- # Directly execute the multi-line generated code.
232
  exec(clean_code, safe_globals, safe_locals)
233
  output = safe_locals.get("result", None)
234
  if output is None:
235
  output = safe_locals.get("basic_info", None)
236
- except Exception as ex:
237
  error_details = traceback.format_exc()
238
  if "ValueError: time data" in error_details:
239
- error_details += "\nHint: The generated code might be using a fixed datetime format. Consider using pd.to_datetime() without a fixed format or with dayfirst=True."
 
 
 
240
  if "KeyError" in error_details:
241
  error_details += "\nHint: The generated code might be referencing columns that do not exist in your DataFrame."
242
  if "NameError" in error_details:
243
- error_details += "\nHint: Ensure that all required built-in types and libraries (like float, int, etc.) are included in the safe built-ins."
 
 
 
244
  return f"An error occurred during code execution:\n{error_details}", safe_globals["charts"]
245
-
246
  if capture_charts and not safe_globals["charts"]:
247
  fig_nums = plt.get_fignums()
248
  for num in fig_nums:
@@ -253,50 +250,48 @@ def safe_exec_code(code, df, capture_charts=True, interactive=False, extra_globa
253
  img = Image.open(buf).convert("RGB")
254
  safe_globals["charts"].append(img)
255
  plt.close("all")
256
-
257
  if interactive:
258
  for img in safe_globals["charts"]:
259
  img.show()
260
-
261
  if output is None:
262
  output = "No output variable ('result' or 'basic_info') was set by the code."
263
  return output, safe_globals["charts"]
264
 
 
265
  def generate_interpretation(analysis_result, nl_query):
266
  """
267
- Use OpenAI to generate a detailed interpretation of the analysis result.
268
  Provide context from the user's query and explain what the results mean.
269
  The response will be formatted in markdown.
270
  """
271
  prompt = f"""
272
  You are a knowledgeable data analyst. Based on the following analysis result and the user's query, provide a detailed interpretation and descriptive analysis of the results. Explain what the results mean, any insights that can be drawn, and any potential limitations.
273
  Please format your output in markdown (including headers, bullet points, and other markdown formatting as appropriate).
274
-
275
  User Query: "{nl_query}"
276
-
277
  Analysis Result:
278
  {analysis_result}
279
-
280
  Provide a clear and detailed explanation in plain language.
281
  """
282
- response = openai.chat.completions.create(
283
- model="gpt-4o-mini",
284
- messages=[
285
- {"role": "system", "content": "You are an expert data analysis assistant who explains analysis results clearly."},
286
- {"role": "user", "content": prompt}
287
- ],
288
- temperature=0.5,
289
- max_tokens=5000,
290
  )
291
- interpretation = response.choices[0].message.content.strip()
292
  return interpretation
293
 
 
294
  def generate_and_run(nl_query, file, interactive_mode=False):
295
  """
296
- Load the file, generate both a basic understanding and a detailed analysis code using OpenAI,
297
  execute the generated code, and then generate an interpretation of the analysis result.
298
  Returns a tuple: (analysis result, combined generated code, DataFrame preview, charts, interpretation).
299
-
300
  The process is split into two steps:
301
  1. Generate basic understanding code that produces 'basic_info'.
302
  2. Generate problem-solving code that uses 'basic_info' and produces the final analysis ('result').
@@ -304,34 +299,37 @@ def generate_and_run(nl_query, file, interactive_mode=False):
304
  df = load_file(file)
305
  if df is None:
306
  return "Error loading file.", "", pd.DataFrame({"Error": ["No data available."]}), [], ""
307
-
308
  df_preview = df.copy()
309
  # Step 1: Generate and execute basic understanding code.
310
  basic_code = generate_basic_understanding_code(df_preview)
311
  basic_info, basic_charts = safe_exec_code(basic_code, df, capture_charts=False, interactive=interactive_mode)
312
-
313
  # Step 2: Generate and execute problem-solving code, injecting basic_info.
314
  problem_code = generate_problem_solving_code(nl_query, df_preview, basic_info)
315
- result, problem_charts = safe_exec_code(problem_code, df, capture_charts=True, interactive=interactive_mode, extra_globals={"basic_info": basic_info})
316
-
 
 
317
  interpretation = generate_interpretation(result, nl_query)
318
  combined_code = f"### Basic Understanding Code:\n{basic_code}\n\n### Problem Solving Code:\n{problem_code}"
319
  combined_charts = basic_charts + problem_charts
320
  return result, combined_code, df_preview, combined_charts, interpretation
321
 
 
322
  # Gradio interface setup
323
  with gr.Blocks() as demo:
324
  gr.Markdown("## Dynamic Data Analysis with Two-Step Code Generation and Interpretation")
325
-
326
  with gr.Tab("Data Upload & Preview"):
327
  file_input = gr.File(label="Upload CSV or Excel file (.csv, .xls, .xlsx)")
328
  data_preview = gr.Dataframe(label="Data Preview")
329
  file_input.change(fn=preview_file, inputs=file_input, outputs=data_preview)
330
-
331
  with gr.Tab("Generate & Execute Analysis (Gradio Mode)"):
332
  nl_query = gr.Textbox(
333
- label="Enter your query",
334
- placeholder="e.g., Generate summary statistics and charts for Gender and Age distributions"
335
  )
336
  generate_btn = gr.Button("Generate & Execute Code")
337
  analysis_output = gr.Textbox(label="Analysis Result", lines=10)
@@ -339,15 +337,14 @@ with gr.Blocks() as demo:
339
  preview_output = gr.Dataframe(label="Data Preview")
340
  charts_output = gr.Gallery(label="Charts", show_label=True)
341
  interpretation_output = gr.Markdown(label="Interpretation")
342
-
343
  generate_btn.click(
344
  fn=lambda query, file: generate_and_run(query, file, interactive_mode=True),
345
  inputs=[nl_query, file_input],
346
- outputs=[analysis_output, code_output, preview_output, charts_output, interpretation_output]
347
  )
348
-
349
  # Launch the app. This main block is useful for Hugging Face Spaces.
350
  if __name__ == "__main__":
351
  demo.launch()
352
  # demo.launch(auth=("username", "password"))
353
-
 
1
+ # app.py
2
  import os
3
  import io
4
  import re
5
  import gradio as gr
6
  import pandas as pd
 
7
  import matplotlib.pyplot as plt
 
8
  from PIL import Image
9
  import traceback
10
 
11
+ from groq import Groq
12
+
13
+ # Groq config (Secrets en Hugging Face Space)
14
+ GROQ_API_KEY = (os.getenv("GROQ_API_KEY") or "").strip()
15
+ GROQ_MODEL = (os.getenv("GROQ_MODEL") or "llama-3.3-70b-versatile").strip()
16
+
17
+ if not GROQ_API_KEY:
18
+ raise RuntimeError("Falta GROQ_API_KEY en Secrets del Space.")
19
+
20
+ groq_client = Groq(api_key=GROQ_API_KEY)
21
+
22
 
23
  def load_file(file):
24
  """Load a CSV or Excel file into a pandas DataFrame."""
 
32
  if file_path is None:
33
  return None
34
  try:
35
+ if file_name.endswith(".csv"):
36
  df = pd.read_csv(file_path)
37
+ elif file_name.endswith(".xlsx"):
38
+ df = pd.read_excel(file_path, engine="openpyxl")
39
+ elif file_name.endswith(".xls"):
40
+ df = pd.read_excel(file_path, engine="xlrd")
41
  else:
42
  return None
43
  except Exception as e:
 
48
  # Assume file is a file-like object (as on your local machine)
49
  file_name = file.name.lower()
50
  try:
51
+ if file_name.endswith(".csv"):
52
  df = pd.read_csv(file)
53
+ elif file_name.endswith(".xlsx"):
54
+ df = pd.read_excel(file, engine="openpyxl")
55
+ elif file_name.endswith(".xls"):
56
+ df = pd.read_excel(file, engine="xlrd")
57
  else:
58
  return None
59
  except Exception as e:
 
61
  return None
62
  return df
63
 
64
+
65
  def preview_file(file):
66
  """Return the DataFrame for preview."""
67
  df = load_file(file)
68
  if df is None:
 
69
  return pd.DataFrame({"Error": ["Error loading file or unsupported file type."]})
70
  return df
71
 
72
+
73
  def generate_basic_understanding_code(df_preview):
 
 
 
 
 
 
 
 
 
74
  prompt = f"""
75
  You are a data analysis expert. Write Python code that performs an exploratory analysis of the DataFrame.
76
  Assume a pandas DataFrame named 'df' is already loaded.
 
82
  When converting date strings to datetime, use pd.to_datetime() without a fixed format or with dayfirst=True.
83
  If your analysis includes charts, call plt.show() after each chart so they can be captured.
84
  Only reference columns that are present in df.columns.
 
85
  Note: The following safe built-ins are available: list, dict, set, tuple, abs, min, max, sum, len, range, print, pd, plt, __import__.
 
86
  DataFrame preview:
87
  Columns: {list(df_preview.columns)}
88
  Sample Data (first 3 rows):
89
  {df_preview.head(3).to_dict(orient='records')}
90
  """
91
+ response = groq_client.chat.completions.create(
92
+ model=GROQ_MODEL,
93
  messages=[
94
  {"role": "system", "content": "You are an expert data analysis assistant who outputs only raw Python code."},
95
+ {"role": "user", "content": prompt},
96
  ],
97
  temperature=0.3,
98
  max_tokens=3500,
99
  )
100
+ code = (response.choices[0].message.content or "").strip()
101
  return code
102
 
103
+
104
  def generate_problem_solving_code(nl_query, df_preview, basic_info):
 
 
 
 
 
 
 
 
 
 
105
  prompt = f"""
106
  You are a data analysis expert. Write Python code that performs the analysis as described below.
107
  Assume a pandas DataFrame named 'df' is already loaded and that you have already generated an exploratory summary stored in 'basic_info'.
 
115
  When converting date strings to datetime, use pd.to_datetime() without a fixed format or with dayfirst=True.
116
  If your analysis includes charts, call plt.show() after each chart so they can be captured.
117
  Only reference columns that are present in df.columns.
 
118
  Note: The following safe built-ins are available: list, dict, set, tuple, abs, min, max, sum, len, range, print, pd, plt, __import__.
 
119
  DataFrame preview:
120
  Columns: {list(df_preview.columns)}
121
  Sample Data (first 3 rows):
122
  {df_preview.head(3).to_dict(orient='records')}
 
123
  User Query: "{nl_query}"
124
  """
125
+ response = groq_client.chat.completions.create(
126
+ model=GROQ_MODEL,
127
  messages=[
128
  {"role": "system", "content": "You are an expert data analysis assistant who outputs only raw Python code."},
129
+ {"role": "user", "content": prompt},
130
  ],
131
  temperature=0.3,
132
  max_tokens=3500,
133
  )
134
+ code = (response.choices[0].message.content or "").strip()
135
  return code
136
 
137
+
138
  def validate_generated_code(code, df):
139
  """
140
  Validate that the generated code references only columns that exist in the DataFrame.
 
147
  return False, missing_cols
148
  return True, []
149
 
150
+
151
  def safe_exec_code(code, df, capture_charts=True, interactive=False, extra_globals=None):
152
  """
153
  Execute the generated code in a restricted namespace.
 
158
  code_lines = code.splitlines()
159
  clean_lines = [line for line in code_lines if not line.strip().startswith("```")]
160
  clean_code = "\n".join(clean_lines).strip()
161
+
162
  # Validate that the generated code references only existing DataFrame columns.
163
  valid, missing_cols = validate_generated_code(clean_code, df)
164
  if not valid:
165
+ return (
166
+ f"Generated code references missing columns: {missing_cols}\nPlease adjust your prompt or data.",
167
+ [],
168
+ )
169
+
170
  # Expanded safe built-ins. Including float, int, bool, etc.
171
  safe_builtins = {
172
  "abs": abs,
 
194
  "__import__": __import__,
195
  }
196
  safe_globals = {"__builtins__": safe_builtins, "df": df, "plt": plt, "charts": []}
197
+
198
  # Pre-import seaborn as sns if available.
199
  try:
200
  import seaborn as sns
201
+
202
  safe_globals["sns"] = sns
203
  except ImportError:
204
  pass
 
206
  if extra_globals is not None:
207
  safe_globals.update(extra_globals)
208
  safe_locals = {}
209
+
210
  if capture_charts:
211
+
212
  def custom_show(*args, **kwargs):
213
  buf = io.BytesIO()
214
  plt.savefig(buf, format="png")
 
216
  img = Image.open(buf).convert("RGB")
217
  safe_globals["charts"].append(img)
218
  plt.close()
219
+
220
  safe_globals["plt"].show = custom_show
221
+
222
  try:
 
223
  exec(clean_code, safe_globals, safe_locals)
224
  output = safe_locals.get("result", None)
225
  if output is None:
226
  output = safe_locals.get("basic_info", None)
227
+ except Exception:
228
  error_details = traceback.format_exc()
229
  if "ValueError: time data" in error_details:
230
+ error_details += (
231
+ "\nHint: The generated code might be using a fixed datetime format. "
232
+ "Consider using pd.to_datetime() without a fixed format or with dayfirst=True."
233
+ )
234
  if "KeyError" in error_details:
235
  error_details += "\nHint: The generated code might be referencing columns that do not exist in your DataFrame."
236
  if "NameError" in error_details:
237
+ error_details += (
238
+ "\nHint: Ensure that all required built-in types and libraries (like float, int, etc.) "
239
+ "are included in the safe built-ins."
240
+ )
241
  return f"An error occurred during code execution:\n{error_details}", safe_globals["charts"]
242
+
243
  if capture_charts and not safe_globals["charts"]:
244
  fig_nums = plt.get_fignums()
245
  for num in fig_nums:
 
250
  img = Image.open(buf).convert("RGB")
251
  safe_globals["charts"].append(img)
252
  plt.close("all")
253
+
254
  if interactive:
255
  for img in safe_globals["charts"]:
256
  img.show()
257
+
258
  if output is None:
259
  output = "No output variable ('result' or 'basic_info') was set by the code."
260
  return output, safe_globals["charts"]
261
 
262
+
263
  def generate_interpretation(analysis_result, nl_query):
264
  """
265
+ Use Groq to generate a detailed interpretation of the analysis result.
266
  Provide context from the user's query and explain what the results mean.
267
  The response will be formatted in markdown.
268
  """
269
  prompt = f"""
270
  You are a knowledgeable data analyst. Based on the following analysis result and the user's query, provide a detailed interpretation and descriptive analysis of the results. Explain what the results mean, any insights that can be drawn, and any potential limitations.
271
  Please format your output in markdown (including headers, bullet points, and other markdown formatting as appropriate).
 
272
  User Query: "{nl_query}"
 
273
  Analysis Result:
274
  {analysis_result}
 
275
  Provide a clear and detailed explanation in plain language.
276
  """
277
+ response = groq_client.chat.completions.create(
278
+ model=GROQ_MODEL,
279
+ messages=[
280
+ {"role": "system", "content": "You are an expert data analysis assistant who explains analysis results clearly."},
281
+ {"role": "user", "content": prompt},
282
+ ],
283
+ temperature=0.5,
284
+ max_tokens=5000,
285
  )
286
+ interpretation = (response.choices[0].message.content or "").strip()
287
  return interpretation
288
 
289
+
290
  def generate_and_run(nl_query, file, interactive_mode=False):
291
  """
292
+ Load the file, generate both a basic understanding and a detailed analysis code using Groq,
293
  execute the generated code, and then generate an interpretation of the analysis result.
294
  Returns a tuple: (analysis result, combined generated code, DataFrame preview, charts, interpretation).
 
295
  The process is split into two steps:
296
  1. Generate basic understanding code that produces 'basic_info'.
297
  2. Generate problem-solving code that uses 'basic_info' and produces the final analysis ('result').
 
299
  df = load_file(file)
300
  if df is None:
301
  return "Error loading file.", "", pd.DataFrame({"Error": ["No data available."]}), [], ""
302
+
303
  df_preview = df.copy()
304
  # Step 1: Generate and execute basic understanding code.
305
  basic_code = generate_basic_understanding_code(df_preview)
306
  basic_info, basic_charts = safe_exec_code(basic_code, df, capture_charts=False, interactive=interactive_mode)
307
+
308
  # Step 2: Generate and execute problem-solving code, injecting basic_info.
309
  problem_code = generate_problem_solving_code(nl_query, df_preview, basic_info)
310
+ result, problem_charts = safe_exec_code(
311
+ problem_code, df, capture_charts=True, interactive=interactive_mode, extra_globals={"basic_info": basic_info}
312
+ )
313
+
314
  interpretation = generate_interpretation(result, nl_query)
315
  combined_code = f"### Basic Understanding Code:\n{basic_code}\n\n### Problem Solving Code:\n{problem_code}"
316
  combined_charts = basic_charts + problem_charts
317
  return result, combined_code, df_preview, combined_charts, interpretation
318
 
319
+
320
  # Gradio interface setup
321
  with gr.Blocks() as demo:
322
  gr.Markdown("## Dynamic Data Analysis with Two-Step Code Generation and Interpretation")
323
+
324
  with gr.Tab("Data Upload & Preview"):
325
  file_input = gr.File(label="Upload CSV or Excel file (.csv, .xls, .xlsx)")
326
  data_preview = gr.Dataframe(label="Data Preview")
327
  file_input.change(fn=preview_file, inputs=file_input, outputs=data_preview)
328
+
329
  with gr.Tab("Generate & Execute Analysis (Gradio Mode)"):
330
  nl_query = gr.Textbox(
331
+ label="Enter your query",
332
+ placeholder="e.g., Generate summary statistics and charts for Gender and Age distributions",
333
  )
334
  generate_btn = gr.Button("Generate & Execute Code")
335
  analysis_output = gr.Textbox(label="Analysis Result", lines=10)
 
337
  preview_output = gr.Dataframe(label="Data Preview")
338
  charts_output = gr.Gallery(label="Charts", show_label=True)
339
  interpretation_output = gr.Markdown(label="Interpretation")
340
+
341
  generate_btn.click(
342
  fn=lambda query, file: generate_and_run(query, file, interactive_mode=True),
343
  inputs=[nl_query, file_input],
344
+ outputs=[analysis_output, code_output, preview_output, charts_output, interpretation_output],
345
  )
346
+
347
  # Launch the app. This main block is useful for Hugging Face Spaces.
348
  if __name__ == "__main__":
349
  demo.launch()
350
  # demo.launch(auth=("username", "password"))