Spaces:

deepa-shalini
/

ChaRtBot

Sleeping

App Files Files Community

Deepa Shalini commited on Jan 6

Commit

4f6abcb

1 Parent(s): 9c1d9fd

validation checks for prompt and alert messages

Browse files

Files changed (3) hide show

.gitignore +4 -1
app.py +4 -2
utils/prompt.py +92 -34

.gitignore CHANGED Viewed

@@ -14,4 +14,7 @@ utils/chartbot_dataset_layout.py
 utils/components.py
 # ignore design html file
-design.html

 utils/components.py
 # ignore design html file
+design.html
+# ignore temporary files created
+temp*

app.py CHANGED Viewed

@@ -8,7 +8,8 @@ from utils import prompt, helpers
 app = dash.Dash(__name__, suppress_callback_exceptions=True)
 # Define the layout matching design.html
-app.layout = html.Div(
     [
         html.Div(
             [
@@ -133,6 +134,7 @@ app.layout = html.Div(
         )
     ],
     className="viewport"
 )
 # Callback for file upload
@@ -273,4 +275,4 @@ def reset_chat(n_clicks):
     return dash.no_update
 if __name__ == "__main__":
-    app.run(debug=True)

 app = dash.Dash(__name__, suppress_callback_exceptions=True)
 # Define the layout matching design.html
+app.layout = dmc.MantineProvider(
+    html.Div(
     [
         html.Div(
             [
         )
     ],
     className="viewport"
+    )
 )
 # Callback for file upload
     return dash.no_update
 if __name__ == "__main__":
+    app.run(debug=False)

utils/prompt.py CHANGED Viewed

@@ -23,19 +23,39 @@ if not GROQ_API_KEY or GROQ_API_KEY == 'your_groq_api_key_here':
 # define connectivity to the llm
 try:
     llm = ChatGroq(
-        model="groq/compound-mini",
         api_key=GROQ_API_KEY,
         temperature=0
     )
 except Exception as e:
     raise ValueError(f"Failed to initialize ChatGroq: {str(e)}")
-'''Before creating any visualizations, ensure that any rows with NaN or missing values in the relevant columns are removed. Additionally,
-            handle missing values appropriately based on the context, ensuring cleaner visualizations.
-            For example, use df.dropna(subset=[column_name]) for data cleaning. Never use this statement: df.dropna(inplace=True).'''
 def get_prompt_text() -> str:
-    return """You are a data visualization expert and you only use the graphing library Plotly.
             Ensure that before performing any data manipulation or plotting, the code checks for column data types and converts them if necessary.
             For example, numeric columns should be converted to floats or integers using pd.to_numeric(), and non-numeric columns should be excluded from numeric operations.
             Before creating any visualizations, ensure that any rows with NaN or missing values in the relevant columns are removed. Additionally,
@@ -84,50 +104,80 @@ def get_response(user_input: str, data_top5_csv_string: str, file_name: str) ->
         file_name: Name of the data file
     Returns:
-        LLM response content
     Raises:
-        Exception: If API call fails
     """
     try:
         prompt = ChatPromptTemplate.from_messages(
-                [
-                    (
-                        "system",
-                        get_prompt_text()
-                    ),
-                    MessagesPlaceholder(variable_name="messages")
-                ]
-            )
         chain = prompt | llm
         response = chain.invoke(
             {
                 "messages": [HumanMessage(content=user_input)],
-                "data_visualization_best_practices": helpers.read_doc(helpers.get_app_file_path("assets", "data_viz_best_practices.txt")),
-                "example_subplots1": helpers.read_doc(helpers.get_app_file_path("assets", "example_subplots1.txt")),
-                "example_subplots2": helpers.read_doc(helpers.get_app_file_path("assets", "example_subplots2.txt")),
-                "example_subplots3": helpers.read_doc(helpers.get_app_file_path("assets", "example_subplots3.txt")),
                 "data": data_top5_csv_string,
                 "name_of_file": file_name
             }
         )
-        return response.content
     except Exception as e:
         error_msg = str(e)
         if "rate_limit" in error_msg.lower() or "429" in error_msg:
             raise Exception("Rate limit exceeded. Please wait a moment and try again.")
         elif "authentication" in error_msg.lower() or "401" in error_msg or "api_key" in error_msg.lower():
-            raise Exception("Authentication failed. Please check your GROQ_API_KEY in the .env file.")
         elif "timeout" in error_msg.lower():
             raise Exception("Request timed out. Please try again.")
         else:
-            raise Exception(f"Error communicating with Groq API: {error_msg}")
 def get_python_exception_prompt_text() -> str:
     return """The Python code you provided {code} has an error {exception}"""
 def get_python_exception_response(code: str, exception: str) -> str:
@@ -146,34 +196,42 @@ def get_python_exception_response(code: str, exception: str) -> str:
     """
     try:
         prompt = ChatPromptTemplate.from_messages(
-                [
-                    (
-                        "system",
-                        get_python_exception_prompt_text()
-                    ),
-                    MessagesPlaceholder(variable_name="messages")
-                ]
-            )
         chain = prompt | llm
         response = chain.invoke(
             {
-                "messages": [HumanMessage(content="Rewrite the entire Python code so that it does not contain any errors. The code should be able to run without any errors.")],
                 "code": code,
                 "exception": exception
             }
         )
         return response.content
     except Exception as e:
         error_msg = str(e)
         if "rate_limit" in error_msg.lower() or "429" in error_msg:
             raise Exception("Rate limit exceeded. Please wait a moment and try again.")
         elif "authentication" in error_msg.lower() or "401" in error_msg or "api_key" in error_msg.lower():
-            raise Exception("Authentication failed. Please check your GROQ_API_KEY in the .env file.")
         elif "timeout" in error_msg.lower():
             raise Exception("Request timed out. Please try again.")
         else:
-            raise Exception(f"Error communicating with Groq API: {error_msg}")

 # define connectivity to the llm
 try:
     llm = ChatGroq(
+        model="llama-3.3-70b-versatile",
         api_key=GROQ_API_KEY,
         temperature=0
     )
 except Exception as e:
     raise ValueError(f"Failed to initialize ChatGroq: {str(e)}")
 def get_prompt_text() -> str:
+    """
+    Get the system prompt for data visualization generation.
+    Returns:
+        str: The system prompt template
+    """
+    return """You are a data visualization expert and you only use the graphing library Plotly.
+            CRITICAL VALIDATION RULES - EXECUTE BEFORE GENERATING ANY CODE:
+            1. RELEVANCE CHECK: Before generating any code, you MUST verify that the user's request is relevant to the provided dataset.
+            2. COLUMN VERIFICATION: Analyze the first 5 rows of data provided. If the user explicitly mentions column names that do NOT exist in the dataset, you MUST return an error message instead of code.
+            3. DATA CONTEXT VERIFICATION: If the user's request asks about metrics, categories, or data points that are clearly incompatible with the dataset columns shown, you MUST return an error message instead of code.
+            4. NON-VISUALIZATION REQUESTS: If the user's request is not about data visualization (e.g., asking for text generation, general questions, unrelated tasks), you MUST return an error message instead of code.
+            ERROR MESSAGE FORMAT - Use this EXACT format when validation fails:
+            ERROR: The request appears to be unrelated to the provided dataset. Please rephrase your request to refer to the actual columns and data available in your file. Available columns are: [list the column names from the data provided].
+            IMPORTANT: Only generate Python code if ALL of the following are true:
+            - The request is about creating a data visualization
+            - The request refers to columns, metrics, or patterns that could reasonably exist in the provided dataset
+            - The user has not explicitly mentioned column names that don't exist in the dataset
+            If any validation rule fails, return ONLY the error message in the format specified above. Do NOT generate any Python code.
+            IF VALIDATION PASSES, PROCEED WITH CODE GENERATION:
             Ensure that before performing any data manipulation or plotting, the code checks for column data types and converts them if necessary.
             For example, numeric columns should be converted to floats or integers using pd.to_numeric(), and non-numeric columns should be excluded from numeric operations.
             Before creating any visualizations, ensure that any rows with NaN or missing values in the relevant columns are removed. Additionally,
         file_name: Name of the data file
     Returns:
+        LLM response content containing Python code or error message
     Raises:
+        Exception: If API call fails or validation fails
     """
     try:
         prompt = ChatPromptTemplate.from_messages(
+            [
+                ("system", get_prompt_text()),
+                MessagesPlaceholder(variable_name="messages")
+            ]
+        )
         chain = prompt | llm
         response = chain.invoke(
             {
                 "messages": [HumanMessage(content=user_input)],
+                "data_visualization_best_practices": helpers.read_doc(
+                    helpers.get_app_file_path("assets", "data_viz_best_practices.txt")
+                ),
+                "example_subplots1": helpers.read_doc(
+                    helpers.get_app_file_path("assets", "example_subplots1.txt")
+                ),
+                "example_subplots2": helpers.read_doc(
+                    helpers.get_app_file_path("assets", "example_subplots2.txt")
+                ),
+                "example_subplots3": helpers.read_doc(
+                    helpers.get_app_file_path("assets", "example_subplots3.txt")
+                ),
                 "data": data_top5_csv_string,
                 "name_of_file": file_name
             }
         )
+        # Check if the response is an error message instead of code
+        response_text = response.content.strip()
+        if response_text.startswith("ERROR:"):
+            # Extract the error message and raise validation error
+            error_message = response_text.replace("ERROR:", "").strip()
+            raise ValueError(error_message)
+        return response_text
+    except ValueError as ve:
+        # This is our custom validation error from the LLM
+        # Re-raise with user-friendly message
+        raise Exception(f"Unable to process your request: {str(ve)}")
     except Exception as e:
         error_msg = str(e)
+        # DEBUG: Print the actual error to understand what's happening
+        print(f"DEBUG - Caught exception type: {type(e).__name__}")
+        print(f"DEBUG - Error message: {error_msg}")
+        # Check for specific API errors (these are real API issues, not validation errors)
         if "rate_limit" in error_msg.lower() or "429" in error_msg:
             raise Exception("Rate limit exceeded. Please wait a moment and try again.")
         elif "authentication" in error_msg.lower() or "401" in error_msg or "api_key" in error_msg.lower():
+            raise Exception("We're having trouble generating your visualization.")
         elif "timeout" in error_msg.lower():
             raise Exception("Request timed out. Please try again.")
         else:
+            raise Exception(f"Unable to process your request: {error_msg}")
 def get_python_exception_prompt_text() -> str:
+    """
+    Get the system prompt for fixing Python code errors.
+    Returns:
+        str: The system prompt for error fixing
+    """
     return """The Python code you provided {code} has an error {exception}"""
 def get_python_exception_response(code: str, exception: str) -> str:
     """
     try:
         prompt = ChatPromptTemplate.from_messages(
+            [
+                ("system", get_python_exception_prompt_text()),
+                MessagesPlaceholder(variable_name="messages")
+            ]
+        )
         chain = prompt | llm
         response = chain.invoke(
             {
+                "messages": [HumanMessage(
+                    content="Rewrite the entire Python code so that it does not contain any errors. "
+                            "The code should be able to run without any errors."
+                )],
                 "code": code,
                 "exception": exception
             }
         )
+        response_text = response.content.strip()
+        print(f"DEBUG - Fixed code response: {response_text[:200]}...")  # Print first 200 chars
         return response.content
     except Exception as e:
         error_msg = str(e)
+        # DEBUG: Print the actual error to understand what's happening
+        print(f"DEBUG - Exception fixing failed - Exception type: {type(e).__name__}")
+        print(f"DEBUG - Exception fixing failed - Error message: {error_msg}")
         if "rate_limit" in error_msg.lower() or "429" in error_msg:
             raise Exception("Rate limit exceeded. Please wait a moment and try again.")
         elif "authentication" in error_msg.lower() or "401" in error_msg or "api_key" in error_msg.lower():
+            raise Exception("We're having trouble generating your visualization.")
         elif "timeout" in error_msg.lower():
             raise Exception("Request timed out. Please try again.")
         else:
+            raise Exception(f"Unable to process your request: {error_msg}")