yatabase1 commited on
Commit
baed98d
·
verified ·
1 Parent(s): 03b8d2f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -1
app.py CHANGED
@@ -69,6 +69,7 @@ def generate_basic_understanding_code(df_preview):
69
  - For numeric columns, summary statistics (mean, median, std, etc.).
70
  - For non-numeric columns, counts, unique values, mode, and frequency distributions.
71
  If charts are generated, ensure plt.show() is called after each chart so they can be captured.
 
72
  """
73
  prompt = f"""
74
  You are a data analysis expert. Write Python code that performs an exploratory analysis of the DataFrame.
@@ -78,6 +79,7 @@ Assign the exploratory summary to a variable named 'basic_info' as a dictionary.
78
  For each column in df, include its data type.
79
  - For numeric columns (use pd.api.types.is_numeric_dtype), include summary statistics (mean, median, std, etc.).
80
  - For non-numeric columns, treat them as categorical variables and include counts, unique values, mode, and frequency distributions.
 
81
  If your analysis includes charts, call plt.show() after each chart so they can be captured.
82
 
83
  Note: The following safe built-ins are available: list, dict, set, tuple, abs, min, max, sum, len, range, print, pd, plt, __import__.
@@ -107,6 +109,7 @@ def generate_problem_solving_code(nl_query, df_preview, basic_info):
107
  The final analysis should be assigned to a variable named 'result' as a dictionary with keys:
108
  'summary', 'detailed_stats', 'insights', and 'chart_descriptions'.
109
  If charts are generated, call plt.show() after each chart so they can be captured.
 
110
  """
111
  prompt = f"""
112
  You are a data analysis expert. Write Python code that performs the analysis as described below.
@@ -118,6 +121,7 @@ When processing the DataFrame, first inspect each column’s data type:
118
  - For non-numeric columns, treat them as categorical variables and compute appropriate descriptive statistics (counts, unique values, mode, and frequency distributions).
119
  - Only generate charts and tables that are relevant to the problem at hand. Exclude fields that are not relevant to the problem from the charts and tables.
120
  Incorporate insights from 'basic_info' if relevant.
 
121
  If your analysis includes charts, call plt.show() after each chart so they can be captured.
122
 
123
  Note: The following safe built-ins are available: list, dict, set, tuple, abs, min, max, sum, len, range, print, pd, plt, __import__.
@@ -198,7 +202,8 @@ def safe_exec_code(code, df, capture_charts=True, interactive=False, extra_globa
198
  output = safe_locals.get("basic_info", None)
199
  except Exception as ex:
200
  error_details = traceback.format_exc()
201
- # Append a hint for KeyError related issues.
 
202
  if "KeyError" in error_details:
203
  error_details += "\nHint: The generated code might be referencing columns that do not exist in your DataFrame."
204
  return f"An error occurred during code execution:\n{error_details}", safe_globals["charts"]
 
69
  - For numeric columns, summary statistics (mean, median, std, etc.).
70
  - For non-numeric columns, counts, unique values, mode, and frequency distributions.
71
  If charts are generated, ensure plt.show() is called after each chart so they can be captured.
72
+ Note: When converting dates, use pd.to_datetime() without a fixed format or with dayfirst=True.
73
  """
74
  prompt = f"""
75
  You are a data analysis expert. Write Python code that performs an exploratory analysis of the DataFrame.
 
79
  For each column in df, include its data type.
80
  - For numeric columns (use pd.api.types.is_numeric_dtype), include summary statistics (mean, median, std, etc.).
81
  - For non-numeric columns, treat them as categorical variables and include counts, unique values, mode, and frequency distributions.
82
+ When converting date strings to datetime, use pd.to_datetime() without a fixed format or with dayfirst=True.
83
  If your analysis includes charts, call plt.show() after each chart so they can be captured.
84
 
85
  Note: The following safe built-ins are available: list, dict, set, tuple, abs, min, max, sum, len, range, print, pd, plt, __import__.
 
109
  The final analysis should be assigned to a variable named 'result' as a dictionary with keys:
110
  'summary', 'detailed_stats', 'insights', and 'chart_descriptions'.
111
  If charts are generated, call plt.show() after each chart so they can be captured.
112
+ Note: When converting date strings to datetime, use pd.to_datetime() without a fixed format or with dayfirst=True.
113
  """
114
  prompt = f"""
115
  You are a data analysis expert. Write Python code that performs the analysis as described below.
 
121
  - For non-numeric columns, treat them as categorical variables and compute appropriate descriptive statistics (counts, unique values, mode, and frequency distributions).
122
  - Only generate charts and tables that are relevant to the problem at hand. Exclude fields that are not relevant to the problem from the charts and tables.
123
  Incorporate insights from 'basic_info' if relevant.
124
+ When converting date strings to datetime, use pd.to_datetime() without a fixed format or with dayfirst=True.
125
  If your analysis includes charts, call plt.show() after each chart so they can be captured.
126
 
127
  Note: The following safe built-ins are available: list, dict, set, tuple, abs, min, max, sum, len, range, print, pd, plt, __import__.
 
202
  output = safe_locals.get("basic_info", None)
203
  except Exception as ex:
204
  error_details = traceback.format_exc()
205
+ if "ValueError: time data" in error_details:
206
+ error_details += "\nHint: The generated code might be using a fixed datetime format. Consider using pd.to_datetime() without a fixed format or with dayfirst=True."
207
  if "KeyError" in error_details:
208
  error_details += "\nHint: The generated code might be referencing columns that do not exist in your DataFrame."
209
  return f"An error occurred during code execution:\n{error_details}", safe_globals["charts"]