Spaces:

Vashishta-S-2141
/

LLM_Powered_Database_Chatbot

Sleeping

App Files Files Community

SVashishta1 commited on Mar 10

Commit

80b8363

1 Parent(s): 5421c65

Error Fix

Browse files

Files changed (1) hide show

app.py +63 -49

app.py CHANGED Viewed

@@ -168,6 +168,28 @@ def process_text_query(query, history):
     start_time = time.time()
     # Check if we're in CSV context
     if current_context["file_type"] == "csv" and current_context["table_name"]:
         try:
@@ -184,53 +206,48 @@ def process_text_query(query, history):
             question_with_context = f"The table 'data_tab' has columns: {columns_str}. {query}"
             # Special handling for visualization types that need raw data
-            if is_visualization:
-                viz_type = None
-                for vtype, keywords in viz_keywords.items():
-                    if any(keyword in query.lower() for keyword in keywords):
-                        viz_type = vtype
-                        break
-                if viz_type in ['box', 'heatmap']:
-                    # For box plots and heatmaps, we need raw data
-                    if viz_type == 'box':
-                        # For box plots, we need a single numeric column
-                        numeric_cols_query = "SELECT name FROM pragma_table_info('data_tab') WHERE type LIKE '%INT%' OR type LIKE '%REAL%' OR type LIKE '%FLOA%' OR type LIKE '%NUM%';"
-                        cursor.execute(numeric_cols_query)
-                        numeric_cols = [row[0] for row in cursor.fetchall()]
-                        if numeric_cols:
-                            # Find the relevant numeric column based on the query
-                            target_col = None
-                            for col in numeric_cols:
-                                if col.lower() in query.lower():
-                                    target_col = col
-                                    break
-                            # If no specific column is mentioned, use the first numeric column
-                            if not target_col and numeric_cols:
-                                target_col = numeric_cols[0]
-                            # Generate a simple query to get the raw data
-                            sql_query = f"SELECT {target_col} FROM data_tab WHERE {target_col} IS NOT NULL;"
-                        else:
-                            # No numeric columns found
-                            sql_query = "SELECT * FROM data_tab LIMIT 10;"
-                    elif viz_type == 'heatmap':
-                        # For heatmaps, we need multiple numeric columns
-                        numeric_cols_query = "SELECT name FROM pragma_table_info('data_tab') WHERE type LIKE '%INT%' OR type LIKE '%REAL%' OR type LIKE '%FLOA%' OR type LIKE '%NUM%';"
-                        cursor.execute(numeric_cols_query)
-                        numeric_cols = [row[0] for row in cursor.fetchall()]
-                        if len(numeric_cols) >= 2:
-                            # Use all numeric columns (up to a reasonable limit)
-                            cols_to_use = numeric_cols[:10]  # Limit to 10 columns for performance
-                            cols_str = ", ".join(cols_to_use)
-                            sql_query = f"SELECT {cols_str} FROM data_tab WHERE {numeric_cols[0]} IS NOT NULL LIMIT 1000;"
-                        else:
-                            # Not enough numeric columns
-                            sql_query = "SELECT * FROM data_tab LIMIT 10;"
             else:
                 # Generate SQL query using LLM
                 ai_msg = query_prompt | llm
@@ -238,11 +255,8 @@ def process_text_query(query, history):
                 # Clean the SQL query
                 sql_query = clean_sql_query(raw_sql_query)
-                print(f"Generated SQL Query: {sql_query}")
-            # Check if this is a visualization request
-            is_visualization = any(word in query.lower() for word in ['plot', 'graph', 'chart', 'visualize', 'visualization', 'trend'])
             try:
                 # Execute the query

     start_time = time.time()
+    # Define visualization keywords at the beginning
+    viz_keywords = {
+        'bar': ['bar chart', 'bar graph', 'bar plot', 'barchart', 'bargraph'],
+        'line': ['line chart', 'line graph', 'line plot', 'linechart', 'trend', 'trends', 'time series'],
+        'pie': ['pie chart', 'pie graph', 'pie plot', 'piechart', 'distribution', 'proportion'],
+        'histogram': ['histogram', 'distribution of', 'frequency distribution'],
+        'box': ['box plot', 'boxplot', 'box and whisker', 'outliers', 'quartiles'],
+        'heatmap': ['heatmap', 'heat map', 'correlation matrix', 'correlation heatmap'],
+        'scatter': ['scatter', 'scatter plot', 'relationship between', 'correlation between']
+    }
+    # Check if this is a visualization request
+    is_visualization = any(word in query.lower() for word in ['plot', 'graph', 'chart', 'visualize', 'visualization', 'trend', 'show me'])
+    # Determine visualization type from query
+    viz_type = None
+    if is_visualization:
+        for vtype, keywords in viz_keywords.items():
+            if any(keyword in query.lower() for keyword in keywords):
+                viz_type = vtype
+                break
     # Check if we're in CSV context
     if current_context["file_type"] == "csv" and current_context["table_name"]:
         try:
             question_with_context = f"The table 'data_tab' has columns: {columns_str}. {query}"
             # Special handling for visualization types that need raw data
+            if is_visualization and viz_type in ['box', 'heatmap']:
+                # For box plots and heatmaps, we need raw data
+                if viz_type == 'box':
+                    # For box plots, we need a single numeric column
+                    numeric_cols_query = "SELECT name FROM pragma_table_info('data_tab') WHERE type LIKE '%INT%' OR type LIKE '%REAL%' OR type LIKE '%FLOA%' OR type LIKE '%NUM%';"
+                    cursor = conn.cursor()
+                    cursor.execute(numeric_cols_query)
+                    numeric_cols = [row[0] for row in cursor.fetchall()]
+                    if numeric_cols:
+                        # Find the relevant numeric column based on the query
+                        target_col = None
+                        for col in numeric_cols:
+                            if col.lower() in query.lower():
+                                target_col = col
+                                break
+                        # If no specific column is mentioned, use the first numeric column
+                        if not target_col and numeric_cols:
+                            target_col = numeric_cols[0]
+                        # Generate a simple query to get the raw data
+                        sql_query = f"SELECT {target_col} FROM data_tab WHERE {target_col} IS NOT NULL;"
+                    else:
+                        # No numeric columns found
+                        sql_query = "SELECT * FROM data_tab LIMIT 10;"
+                elif viz_type == 'heatmap':
+                    # For heatmaps, we need multiple numeric columns
+                    numeric_cols_query = "SELECT name FROM pragma_table_info('data_tab') WHERE type LIKE '%INT%' OR type LIKE '%REAL%' OR type LIKE '%FLOA%' OR type LIKE '%NUM%';"
+                    cursor = conn.cursor()
+                    cursor.execute(numeric_cols_query)
+                    numeric_cols = [row[0] for row in cursor.fetchall()]
+                    if len(numeric_cols) >= 2:
+                        # Use all numeric columns (up to a reasonable limit)
+                        cols_to_use = numeric_cols[:10]  # Limit to 10 columns for performance
+                        cols_str = ", ".join(cols_to_use)
+                        sql_query = f"SELECT {cols_str} FROM data_tab WHERE {numeric_cols[0]} IS NOT NULL LIMIT 1000;"
+                    else:
+                        # Not enough numeric columns
+                        sql_query = "SELECT * FROM data_tab LIMIT 10;"
             else:
                 # Generate SQL query using LLM
                 ai_msg = query_prompt | llm
                 # Clean the SQL query
                 sql_query = clean_sql_query(raw_sql_query)
+            print(f"Generated SQL Query: {sql_query}")
             try:
                 # Execute the query