Spaces:

Vashishta-S-2141
/

LLM_Powered_Database_Chatbot

Sleeping

App Files Files Community

SVashishta1 commited on Apr 24

Commit

e770679

1 Parent(s): b8b94fc

Restore: Visualization prompt and current_plot for better LLM guidance and SQL generation

Browse files

Files changed (1) hide show

app.py +41 -36

app.py CHANGED Viewed

@@ -13,9 +13,7 @@ import plotly.io as pio
 import traceback
 import base64
 from io import BytesIO
-# I am commenting out voice libraries because we are not using them right now
-# import speech_recognition as sr
-# from gtts import gTTS
 import re
 import importlib.util
@@ -64,7 +62,7 @@ current_context = {
 }
 # Add a global variable to store the current plot
-# current_plot = None
 # Define the prompt with examples for SQL query generation
 query_prompt = ChatPromptTemplate.from_template("""
@@ -83,6 +81,38 @@ Important guidelines:
 Question: {question}
 """)
 # Define the prompt for interpreting the SQL query result
 interpret_prompt = ChatPromptTemplate.from_messages(
     [
@@ -91,38 +121,6 @@ interpret_prompt = ChatPromptTemplate.from_messages(
     ]
 )
-# Add this after the query_prompt definition
-# visualization_prompt = ChatPromptTemplate.from_template("""
-# You are a data visualization expert. Given a question about visualizing data, write a SQLite-compatible SQL query that will retrieve the appropriate data for the visualization.
-#
-# Important guidelines for SQLite syntax:
-# 1. Use strftime() for date functions:
-#    - Year: strftime('%Y', date_column)
-#    - Month: strftime('%m', date_column)
-#    - Day: strftime('%d', date_column)
-#    - Hour: strftime('%H', date_column)
-#
-# 2. For histograms and binning:
-#    - Use: CAST((column / bin_size) AS INT) * bin_size
-#    - Example: CAST((trip_distance / 0.5) AS INT) * 0.5 AS distance_bin
-#
-# 3. For box plots:
-#    - SQLite doesn't support PERCENTILE_CONT or window functions
-#    - Simply return the raw data column: SELECT column_name FROM data_tab
-#    - The application will calculate quartiles and outliers
-#
-# 4. For heatmaps:
-#    - Return raw data for correlation analysis
-#    - Example: SELECT numeric_col1, numeric_col2, numeric_col3 FROM data_tab
-#
-# 5. Always use 'data_tab' as the table name
-#
-# 6. IMPORTANT: Return ONLY the SQL query without any markdown formatting, explanations, or code blocks
-#
-# Question: {question}
-# Visualization type: {viz_type}
-# """)
 # Add this helper function to clean SQL queries
 def clean_sql_query(query_text):
     """Clean SQL query text by removing markdown formatting and comments"""
@@ -260,6 +258,13 @@ def process_text_query(query, history):
                         sql_query = f"SELECT {cols_str} FROM data_tab WHERE {numeric_cols[0]} IS NOT NULL LIMIT 1000;"
                     else:
                         sql_query = "SELECT * FROM data_tab LIMIT 10;"
             else:
                 # For other queries, use the LLM to generate SQL
                 sql_query = llm.invoke(query_prompt.format(question=question_with_context)).content

 import traceback
 import base64
 from io import BytesIO
 import re
 import importlib.util
 }
 # Add a global variable to store the current plot
+current_plot = None
 # Define the prompt with examples for SQL query generation
 query_prompt = ChatPromptTemplate.from_template("""
 Question: {question}
 """)
+# Add this after the query_prompt definition
+visualization_prompt = ChatPromptTemplate.from_template("""
+You are a data visualization expert. Given a question about visualizing data, write a SQLite-compatible SQL query that will retrieve the appropriate data for the visualization.
+Important guidelines for SQLite syntax:
+1. Use strftime() for date functions:
+   - Year: strftime('%Y', date_column)
+   - Month: strftime('%m', date_column)
+   - Day: strftime('%d', date_column)
+   - Hour: strftime('%H', date_column)
+2. For histograms and binning:
+   - Use: CAST((column / bin_size) AS INT) * bin_size
+   - Example: CAST((trip_distance / 0.5) AS INT) * 0.5 AS distance_bin
+3. For box plots:
+   - SQLite doesn't support PERCENTILE_CONT or window functions
+   - Simply return the raw data column: SELECT column_name FROM data_tab
+   - The application will calculate quartiles and outliers
+4. For heatmaps:
+   - Return raw data for correlation analysis
+   - Example: SELECT numeric_col1, numeric_col2, numeric_col3 FROM data_tab
+5. Always use 'data_tab' as the table name
+6. IMPORTANT: Return ONLY the SQL query without any markdown formatting, explanations, or code blocks
+Question: {question}
+Visualization type: {viz_type}
+""")
 # Define the prompt for interpreting the SQL query result
 interpret_prompt = ChatPromptTemplate.from_messages(
     [
     ]
 )
 # Add this helper function to clean SQL queries
 def clean_sql_query(query_text):
     """Clean SQL query text by removing markdown formatting and comments"""
                         sql_query = f"SELECT {cols_str} FROM data_tab WHERE {numeric_cols[0]} IS NOT NULL LIMIT 1000;"
                     else:
                         sql_query = "SELECT * FROM data_tab LIMIT 10;"
+            elif is_visualization:
+                # For visualization queries, use the specialized visualization prompt
+                sql_query = llm.invoke(visualization_prompt.format(
+                    question=question_with_context,
+                    viz_type=viz_type or "bar"
+                )).content
+                sql_query = clean_sql_query(sql_query)
             else:
                 # For other queries, use the LLM to generate SQL
                 sql_query = llm.invoke(query_prompt.format(question=question_with_context)).content