Deepa Shalini commited on
Commit
4f6abcb
·
1 Parent(s): 9c1d9fd

validation checks for prompt and alert messages

Browse files
Files changed (3) hide show
  1. .gitignore +4 -1
  2. app.py +4 -2
  3. utils/prompt.py +92 -34
.gitignore CHANGED
@@ -14,4 +14,7 @@ utils/chartbot_dataset_layout.py
14
  utils/components.py
15
 
16
  # ignore design html file
17
- design.html
 
 
 
 
14
  utils/components.py
15
 
16
  # ignore design html file
17
+ design.html
18
+
19
+ # ignore temporary files created
20
+ temp*
app.py CHANGED
@@ -8,7 +8,8 @@ from utils import prompt, helpers
8
  app = dash.Dash(__name__, suppress_callback_exceptions=True)
9
 
10
  # Define the layout matching design.html
11
- app.layout = html.Div(
 
12
  [
13
  html.Div(
14
  [
@@ -133,6 +134,7 @@ app.layout = html.Div(
133
  )
134
  ],
135
  className="viewport"
 
136
  )
137
 
138
  # Callback for file upload
@@ -273,4 +275,4 @@ def reset_chat(n_clicks):
273
  return dash.no_update
274
 
275
  if __name__ == "__main__":
276
- app.run(debug=True)
 
8
  app = dash.Dash(__name__, suppress_callback_exceptions=True)
9
 
10
  # Define the layout matching design.html
11
+ app.layout = dmc.MantineProvider(
12
+ html.Div(
13
  [
14
  html.Div(
15
  [
 
134
  )
135
  ],
136
  className="viewport"
137
+ )
138
  )
139
 
140
  # Callback for file upload
 
275
  return dash.no_update
276
 
277
  if __name__ == "__main__":
278
+ app.run(debug=False)
utils/prompt.py CHANGED
@@ -23,19 +23,39 @@ if not GROQ_API_KEY or GROQ_API_KEY == 'your_groq_api_key_here':
23
  # define connectivity to the llm
24
  try:
25
  llm = ChatGroq(
26
- model="groq/compound-mini",
27
  api_key=GROQ_API_KEY,
28
  temperature=0
29
  )
30
  except Exception as e:
31
  raise ValueError(f"Failed to initialize ChatGroq: {str(e)}")
32
 
33
- '''Before creating any visualizations, ensure that any rows with NaN or missing values in the relevant columns are removed. Additionally,
34
- handle missing values appropriately based on the context, ensuring cleaner visualizations.
35
- For example, use df.dropna(subset=[column_name]) for data cleaning. Never use this statement: df.dropna(inplace=True).'''
36
-
37
  def get_prompt_text() -> str:
38
- return """You are a data visualization expert and you only use the graphing library Plotly.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  Ensure that before performing any data manipulation or plotting, the code checks for column data types and converts them if necessary.
40
  For example, numeric columns should be converted to floats or integers using pd.to_numeric(), and non-numeric columns should be excluded from numeric operations.
41
  Before creating any visualizations, ensure that any rows with NaN or missing values in the relevant columns are removed. Additionally,
@@ -84,50 +104,80 @@ def get_response(user_input: str, data_top5_csv_string: str, file_name: str) ->
84
  file_name: Name of the data file
85
 
86
  Returns:
87
- LLM response content
88
 
89
  Raises:
90
- Exception: If API call fails
91
  """
92
  try:
93
  prompt = ChatPromptTemplate.from_messages(
94
- [
95
- (
96
- "system",
97
- get_prompt_text()
98
- ),
99
- MessagesPlaceholder(variable_name="messages")
100
- ]
101
- )
102
 
103
  chain = prompt | llm
104
 
105
  response = chain.invoke(
106
  {
107
  "messages": [HumanMessage(content=user_input)],
108
- "data_visualization_best_practices": helpers.read_doc(helpers.get_app_file_path("assets", "data_viz_best_practices.txt")),
109
- "example_subplots1": helpers.read_doc(helpers.get_app_file_path("assets", "example_subplots1.txt")),
110
- "example_subplots2": helpers.read_doc(helpers.get_app_file_path("assets", "example_subplots2.txt")),
111
- "example_subplots3": helpers.read_doc(helpers.get_app_file_path("assets", "example_subplots3.txt")),
 
 
 
 
 
 
 
 
112
  "data": data_top5_csv_string,
113
  "name_of_file": file_name
114
  }
115
  )
116
 
117
- return response.content
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
  except Exception as e:
120
  error_msg = str(e)
 
 
 
 
 
 
121
  if "rate_limit" in error_msg.lower() or "429" in error_msg:
122
  raise Exception("Rate limit exceeded. Please wait a moment and try again.")
123
  elif "authentication" in error_msg.lower() or "401" in error_msg or "api_key" in error_msg.lower():
124
- raise Exception("Authentication failed. Please check your GROQ_API_KEY in the .env file.")
125
  elif "timeout" in error_msg.lower():
126
  raise Exception("Request timed out. Please try again.")
127
  else:
128
- raise Exception(f"Error communicating with Groq API: {error_msg}")
129
 
130
  def get_python_exception_prompt_text() -> str:
 
 
 
 
 
 
131
  return """The Python code you provided {code} has an error {exception}"""
132
 
133
  def get_python_exception_response(code: str, exception: str) -> str:
@@ -146,34 +196,42 @@ def get_python_exception_response(code: str, exception: str) -> str:
146
  """
147
  try:
148
  prompt = ChatPromptTemplate.from_messages(
149
- [
150
- (
151
- "system",
152
- get_python_exception_prompt_text()
153
- ),
154
- MessagesPlaceholder(variable_name="messages")
155
- ]
156
- )
157
 
158
  chain = prompt | llm
159
 
160
  response = chain.invoke(
161
  {
162
- "messages": [HumanMessage(content="Rewrite the entire Python code so that it does not contain any errors. The code should be able to run without any errors.")],
 
 
 
163
  "code": code,
164
  "exception": exception
165
  }
166
  )
167
 
 
 
 
168
  return response.content
169
 
170
  except Exception as e:
171
  error_msg = str(e)
 
 
 
 
 
172
  if "rate_limit" in error_msg.lower() or "429" in error_msg:
173
  raise Exception("Rate limit exceeded. Please wait a moment and try again.")
174
  elif "authentication" in error_msg.lower() or "401" in error_msg or "api_key" in error_msg.lower():
175
- raise Exception("Authentication failed. Please check your GROQ_API_KEY in the .env file.")
176
  elif "timeout" in error_msg.lower():
177
  raise Exception("Request timed out. Please try again.")
178
  else:
179
- raise Exception(f"Error communicating with Groq API: {error_msg}")
 
23
  # define connectivity to the llm
24
  try:
25
  llm = ChatGroq(
26
+ model="llama-3.3-70b-versatile",
27
  api_key=GROQ_API_KEY,
28
  temperature=0
29
  )
30
  except Exception as e:
31
  raise ValueError(f"Failed to initialize ChatGroq: {str(e)}")
32
 
 
 
 
 
33
  def get_prompt_text() -> str:
34
+ """
35
+ Get the system prompt for data visualization generation.
36
+
37
+ Returns:
38
+ str: The system prompt template
39
+ """
40
+ return """You are a data visualization expert and you only use the graphing library Plotly.
41
+
42
+ CRITICAL VALIDATION RULES - EXECUTE BEFORE GENERATING ANY CODE:
43
+ 1. RELEVANCE CHECK: Before generating any code, you MUST verify that the user's request is relevant to the provided dataset.
44
+ 2. COLUMN VERIFICATION: Analyze the first 5 rows of data provided. If the user explicitly mentions column names that do NOT exist in the dataset, you MUST return an error message instead of code.
45
+ 3. DATA CONTEXT VERIFICATION: If the user's request asks about metrics, categories, or data points that are clearly incompatible with the dataset columns shown, you MUST return an error message instead of code.
46
+ 4. NON-VISUALIZATION REQUESTS: If the user's request is not about data visualization (e.g., asking for text generation, general questions, unrelated tasks), you MUST return an error message instead of code.
47
+
48
+ ERROR MESSAGE FORMAT - Use this EXACT format when validation fails:
49
+ ERROR: The request appears to be unrelated to the provided dataset. Please rephrase your request to refer to the actual columns and data available in your file. Available columns are: [list the column names from the data provided].
50
+
51
+ IMPORTANT: Only generate Python code if ALL of the following are true:
52
+ - The request is about creating a data visualization
53
+ - The request refers to columns, metrics, or patterns that could reasonably exist in the provided dataset
54
+ - The user has not explicitly mentioned column names that don't exist in the dataset
55
+
56
+ If any validation rule fails, return ONLY the error message in the format specified above. Do NOT generate any Python code.
57
+
58
+ IF VALIDATION PASSES, PROCEED WITH CODE GENERATION:
59
  Ensure that before performing any data manipulation or plotting, the code checks for column data types and converts them if necessary.
60
  For example, numeric columns should be converted to floats or integers using pd.to_numeric(), and non-numeric columns should be excluded from numeric operations.
61
  Before creating any visualizations, ensure that any rows with NaN or missing values in the relevant columns are removed. Additionally,
 
104
  file_name: Name of the data file
105
 
106
  Returns:
107
+ LLM response content containing Python code or error message
108
 
109
  Raises:
110
+ Exception: If API call fails or validation fails
111
  """
112
  try:
113
  prompt = ChatPromptTemplate.from_messages(
114
+ [
115
+ ("system", get_prompt_text()),
116
+ MessagesPlaceholder(variable_name="messages")
117
+ ]
118
+ )
 
 
 
119
 
120
  chain = prompt | llm
121
 
122
  response = chain.invoke(
123
  {
124
  "messages": [HumanMessage(content=user_input)],
125
+ "data_visualization_best_practices": helpers.read_doc(
126
+ helpers.get_app_file_path("assets", "data_viz_best_practices.txt")
127
+ ),
128
+ "example_subplots1": helpers.read_doc(
129
+ helpers.get_app_file_path("assets", "example_subplots1.txt")
130
+ ),
131
+ "example_subplots2": helpers.read_doc(
132
+ helpers.get_app_file_path("assets", "example_subplots2.txt")
133
+ ),
134
+ "example_subplots3": helpers.read_doc(
135
+ helpers.get_app_file_path("assets", "example_subplots3.txt")
136
+ ),
137
  "data": data_top5_csv_string,
138
  "name_of_file": file_name
139
  }
140
  )
141
 
142
+ # Check if the response is an error message instead of code
143
+ response_text = response.content.strip()
144
+
145
+ if response_text.startswith("ERROR:"):
146
+ # Extract the error message and raise validation error
147
+ error_message = response_text.replace("ERROR:", "").strip()
148
+ raise ValueError(error_message)
149
+
150
+ return response_text
151
+
152
+ except ValueError as ve:
153
+ # This is our custom validation error from the LLM
154
+ # Re-raise with user-friendly message
155
+ raise Exception(f"Unable to process your request: {str(ve)}")
156
 
157
  except Exception as e:
158
  error_msg = str(e)
159
+
160
+ # DEBUG: Print the actual error to understand what's happening
161
+ print(f"DEBUG - Caught exception type: {type(e).__name__}")
162
+ print(f"DEBUG - Error message: {error_msg}")
163
+
164
+ # Check for specific API errors (these are real API issues, not validation errors)
165
  if "rate_limit" in error_msg.lower() or "429" in error_msg:
166
  raise Exception("Rate limit exceeded. Please wait a moment and try again.")
167
  elif "authentication" in error_msg.lower() or "401" in error_msg or "api_key" in error_msg.lower():
168
+ raise Exception("We're having trouble generating your visualization.")
169
  elif "timeout" in error_msg.lower():
170
  raise Exception("Request timed out. Please try again.")
171
  else:
172
+ raise Exception(f"Unable to process your request: {error_msg}")
173
 
174
  def get_python_exception_prompt_text() -> str:
175
+ """
176
+ Get the system prompt for fixing Python code errors.
177
+
178
+ Returns:
179
+ str: The system prompt for error fixing
180
+ """
181
  return """The Python code you provided {code} has an error {exception}"""
182
 
183
  def get_python_exception_response(code: str, exception: str) -> str:
 
196
  """
197
  try:
198
  prompt = ChatPromptTemplate.from_messages(
199
+ [
200
+ ("system", get_python_exception_prompt_text()),
201
+ MessagesPlaceholder(variable_name="messages")
202
+ ]
203
+ )
 
 
 
204
 
205
  chain = prompt | llm
206
 
207
  response = chain.invoke(
208
  {
209
+ "messages": [HumanMessage(
210
+ content="Rewrite the entire Python code so that it does not contain any errors. "
211
+ "The code should be able to run without any errors."
212
+ )],
213
  "code": code,
214
  "exception": exception
215
  }
216
  )
217
 
218
+ response_text = response.content.strip()
219
+ print(f"DEBUG - Fixed code response: {response_text[:200]}...") # Print first 200 chars
220
+
221
  return response.content
222
 
223
  except Exception as e:
224
  error_msg = str(e)
225
+
226
+ # DEBUG: Print the actual error to understand what's happening
227
+ print(f"DEBUG - Exception fixing failed - Exception type: {type(e).__name__}")
228
+ print(f"DEBUG - Exception fixing failed - Error message: {error_msg}")
229
+
230
  if "rate_limit" in error_msg.lower() or "429" in error_msg:
231
  raise Exception("Rate limit exceeded. Please wait a moment and try again.")
232
  elif "authentication" in error_msg.lower() or "401" in error_msg or "api_key" in error_msg.lower():
233
+ raise Exception("We're having trouble generating your visualization.")
234
  elif "timeout" in error_msg.lower():
235
  raise Exception("Request timed out. Please try again.")
236
  else:
237
+ raise Exception(f"Unable to process your request: {error_msg}")