Update app.py
Browse files
app.py
CHANGED
|
@@ -69,6 +69,7 @@ def generate_basic_understanding_code(df_preview):
|
|
| 69 |
- For numeric columns, summary statistics (mean, median, std, etc.).
|
| 70 |
- For non-numeric columns, counts, unique values, mode, and frequency distributions.
|
| 71 |
If charts are generated, ensure plt.show() is called after each chart so they can be captured.
|
|
|
|
| 72 |
"""
|
| 73 |
prompt = f"""
|
| 74 |
You are a data analysis expert. Write Python code that performs an exploratory analysis of the DataFrame.
|
|
@@ -78,6 +79,7 @@ Assign the exploratory summary to a variable named 'basic_info' as a dictionary.
|
|
| 78 |
For each column in df, include its data type.
|
| 79 |
- For numeric columns (use pd.api.types.is_numeric_dtype), include summary statistics (mean, median, std, etc.).
|
| 80 |
- For non-numeric columns, treat them as categorical variables and include counts, unique values, mode, and frequency distributions.
|
|
|
|
| 81 |
If your analysis includes charts, call plt.show() after each chart so they can be captured.
|
| 82 |
|
| 83 |
Note: The following safe built-ins are available: list, dict, set, tuple, abs, min, max, sum, len, range, print, pd, plt, __import__.
|
|
@@ -107,6 +109,7 @@ def generate_problem_solving_code(nl_query, df_preview, basic_info):
|
|
| 107 |
The final analysis should be assigned to a variable named 'result' as a dictionary with keys:
|
| 108 |
'summary', 'detailed_stats', 'insights', and 'chart_descriptions'.
|
| 109 |
If charts are generated, call plt.show() after each chart so they can be captured.
|
|
|
|
| 110 |
"""
|
| 111 |
prompt = f"""
|
| 112 |
You are a data analysis expert. Write Python code that performs the analysis as described below.
|
|
@@ -118,6 +121,7 @@ When processing the DataFrame, first inspect each column’s data type:
|
|
| 118 |
- For non-numeric columns, treat them as categorical variables and compute appropriate descriptive statistics (counts, unique values, mode, and frequency distributions).
|
| 119 |
- Only generate charts and tables that are relevant to the problem at hand. Exclude fields that are not relevant to the problem from the charts and tables.
|
| 120 |
Incorporate insights from 'basic_info' if relevant.
|
|
|
|
| 121 |
If your analysis includes charts, call plt.show() after each chart so they can be captured.
|
| 122 |
|
| 123 |
Note: The following safe built-ins are available: list, dict, set, tuple, abs, min, max, sum, len, range, print, pd, plt, __import__.
|
|
@@ -198,7 +202,8 @@ def safe_exec_code(code, df, capture_charts=True, interactive=False, extra_globa
|
|
| 198 |
output = safe_locals.get("basic_info", None)
|
| 199 |
except Exception as ex:
|
| 200 |
error_details = traceback.format_exc()
|
| 201 |
-
|
|
|
|
| 202 |
if "KeyError" in error_details:
|
| 203 |
error_details += "\nHint: The generated code might be referencing columns that do not exist in your DataFrame."
|
| 204 |
return f"An error occurred during code execution:\n{error_details}", safe_globals["charts"]
|
|
|
|
| 69 |
- For numeric columns, summary statistics (mean, median, std, etc.).
|
| 70 |
- For non-numeric columns, counts, unique values, mode, and frequency distributions.
|
| 71 |
If charts are generated, ensure plt.show() is called after each chart so they can be captured.
|
| 72 |
+
Note: When converting dates, use pd.to_datetime() without a fixed format or with dayfirst=True.
|
| 73 |
"""
|
| 74 |
prompt = f"""
|
| 75 |
You are a data analysis expert. Write Python code that performs an exploratory analysis of the DataFrame.
|
|
|
|
| 79 |
For each column in df, include its data type.
|
| 80 |
- For numeric columns (use pd.api.types.is_numeric_dtype), include summary statistics (mean, median, std, etc.).
|
| 81 |
- For non-numeric columns, treat them as categorical variables and include counts, unique values, mode, and frequency distributions.
|
| 82 |
+
When converting date strings to datetime, use pd.to_datetime() without a fixed format or with dayfirst=True.
|
| 83 |
If your analysis includes charts, call plt.show() after each chart so they can be captured.
|
| 84 |
|
| 85 |
Note: The following safe built-ins are available: list, dict, set, tuple, abs, min, max, sum, len, range, print, pd, plt, __import__.
|
|
|
|
| 109 |
The final analysis should be assigned to a variable named 'result' as a dictionary with keys:
|
| 110 |
'summary', 'detailed_stats', 'insights', and 'chart_descriptions'.
|
| 111 |
If charts are generated, call plt.show() after each chart so they can be captured.
|
| 112 |
+
Note: When converting date strings to datetime, use pd.to_datetime() without a fixed format or with dayfirst=True.
|
| 113 |
"""
|
| 114 |
prompt = f"""
|
| 115 |
You are a data analysis expert. Write Python code that performs the analysis as described below.
|
|
|
|
| 121 |
- For non-numeric columns, treat them as categorical variables and compute appropriate descriptive statistics (counts, unique values, mode, and frequency distributions).
|
| 122 |
- Only generate charts and tables that are relevant to the problem at hand. Exclude fields that are not relevant to the problem from the charts and tables.
|
| 123 |
Incorporate insights from 'basic_info' if relevant.
|
| 124 |
+
When converting date strings to datetime, use pd.to_datetime() without a fixed format or with dayfirst=True.
|
| 125 |
If your analysis includes charts, call plt.show() after each chart so they can be captured.
|
| 126 |
|
| 127 |
Note: The following safe built-ins are available: list, dict, set, tuple, abs, min, max, sum, len, range, print, pd, plt, __import__.
|
|
|
|
| 202 |
output = safe_locals.get("basic_info", None)
|
| 203 |
except Exception as ex:
|
| 204 |
error_details = traceback.format_exc()
|
| 205 |
+
if "ValueError: time data" in error_details:
|
| 206 |
+
error_details += "\nHint: The generated code might be using a fixed datetime format. Consider using pd.to_datetime() without a fixed format or with dayfirst=True."
|
| 207 |
if "KeyError" in error_details:
|
| 208 |
error_details += "\nHint: The generated code might be referencing columns that do not exist in your DataFrame."
|
| 209 |
return f"An error occurred during code execution:\n{error_details}", safe_globals["charts"]
|