Spaces:

Vashishta-S-2141
/

LLM_Powered_Database_Chatbot

Sleeping

App Files Files Community

SVashishta1 commited on Mar 1

Commit

8de36f9

1 Parent(s): fbbf665

Error Fix

Browse files

Files changed (1) hide show

app.py +110 -23

app.py CHANGED Viewed

@@ -6,7 +6,12 @@ import tempfile
 import pandas as pd
 import sqlite3
 from langchain_core.prompts import ChatPromptTemplate
-#test
 # Add parent directory to path to import backend modules
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -27,8 +32,16 @@ document_parser = SimpleDocumentParser()
 # Initialize DocumentAssistant
 document_assistant = DocumentAssistant()
-# Load environment variables
-load_dotenv()
 # Database path for CSV data
 DB_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data", "csv_data.db")
@@ -94,15 +107,23 @@ query_prompt = ChatPromptTemplate.from_messages(
     ]
 )
 def process_text_query(query, history):
     """Process a text query and update chat history"""
     if not query:
         return "", history
     # Check if this looks like an SQL query for CSV data
-    if any(keyword in query.lower() for keyword in ['sql', 'query', 'table', 'select', 'from', 'where', 'group by']):
         try:
-            # Try to execute as SQL query against CSV data
             conn = sqlite3.connect(DB_PATH)
             cursor = conn.cursor()
@@ -111,36 +132,81 @@ def process_text_query(query, history):
             tables = [row[0] for row in cursor.fetchall()]
             if tables:
-                # Generate a response that includes table info
                 table_info = []
                 for table in tables:
                     cursor.execute(f"PRAGMA table_info({table});")
                     columns = [f"{col[1]} ({col[2]})" for col in cursor.fetchall()]
                     table_info.append(f"Table '{table}' has columns: {', '.join(columns)}")
-                # Use the assistant to generate a response that includes SQL info
-                context = f"The database contains the following tables:\n" + "\n".join(table_info)
-                response = document_assistant.process_query(f"{context}\n\nUser query: {query}")
-                # Update history with message format
-                history.append({"role": "user", "content": query})
-                history.append({"role": "assistant", "content": response})
             else:
-                # No tables found
-                history.append({"role": "user", "content": query})
-                history.append({"role": "assistant", "content": "No CSV data has been uploaded yet. Please upload a CSV file first."})
             conn.close()
         except Exception as e:
             # Fall back to regular document query
             response = document_assistant.process_query(query)
-            history.append({"role": "user", "content": query})
-            history.append({"role": "assistant", "content": response})
     else:
         # Process regular document query
         response = document_assistant.process_query(query)
-        history.append({"role": "user", "content": query})
-        history.append({"role": "assistant", "content": response})
     return "", history
@@ -164,9 +230,21 @@ def process_file_upload(files):
                 # Load CSV into SQLite
                 conn = sqlite3.connect(DB_PATH)
                 load_csv_to_sqlite(file_path, conn, table_name)
                 conn.close()
                 file_info.append(f"CSV data loaded into table: {table_name}")
                 # Also index with document assistant for text search
                 result = document_assistant.upload_document(file_path)
@@ -239,14 +317,23 @@ def list_documents():
         cursor = conn.cursor()
         cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
         tables = cursor.fetchall()
-        conn.close()
         if tables:
             doc_list.append("\nCSV data tables:")
             for table in tables:
-                doc_list.append(f"- {table[0]}")
-    except:
-        pass
     return "\n".join(doc_list)

 import pandas as pd
 import sqlite3
 from langchain_core.prompts import ChatPromptTemplate
+from langchain_groq import ChatGroq
+import plotly.express as px
+# Load environment variables
+load_dotenv()
 # Add parent directory to path to import backend modules
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 # Initialize DocumentAssistant
 document_assistant = DocumentAssistant()
+# Initialize the LLM using the llama3-8b-8192 model from Groq
+llm = ChatGroq(
+    model="llama3-8b-8192",
+    temperature=0,
+    max_tokens=None,
+    timeout=None,
+    max_retries=2,
+    verbose=True,
+    api_key=os.getenv("GROQ_API_KEY")
+)
 # Database path for CSV data
 DB_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data", "csv_data.db")
     ]
 )
+# Define the prompt for interpreting the SQL query result
+interpret_prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", "You are an experienced data analyst. Examine the following data and provide a clear analysis. Base your analysis solely on the provided data."),
+        ("human", "Question: {question}\n\nSQL Query: {sql_query}\n\nData:\n{data}")
+    ]
+)
 def process_text_query(query, history):
     """Process a text query and update chat history"""
     if not query:
         return "", history
     # Check if this looks like an SQL query for CSV data
+    if any(keyword in query.lower() for keyword in ['sql', 'query', 'table', 'select', 'from', 'where', 'group by', 'data', 'csv', 'average', 'count', 'sum', 'max', 'min']):
         try:
+            # Connect to the SQLite database
             conn = sqlite3.connect(DB_PATH)
             cursor = conn.cursor()
             tables = [row[0] for row in cursor.fetchall()]
             if tables:
+                # Build context with table information
                 table_info = []
                 for table in tables:
                     cursor.execute(f"PRAGMA table_info({table});")
                     columns = [f"{col[1]} ({col[2]})" for col in cursor.fetchall()]
                     table_info.append(f"Table '{table}' has columns: {', '.join(columns)}")
+                # Create question with context
+                question_with_context = f"The database contains the following tables:\n{chr(10).join(table_info)}\n\n{query}"
+                # Generate SQL query
+                ai_msg = query_prompt | llm
+                sql_query = ai_msg.invoke({"question": question_with_context}).content.strip()
+                try:
+                    # Execute the query
+                    result_df = pd.read_sql_query(sql_query, conn)
+                    # Generate a plot if appropriate
+                    fig = None
+                    plot_html = None
+                    if not result_df.empty and len(result_df) > 0:
+                        if len(result_df.columns) == 2:
+                            # Try to create a bar chart for 2-column results
+                            numeric_cols = result_df.select_dtypes(include=['number']).columns.tolist()
+                            if numeric_cols:
+                                x_col = result_df.columns[0] if result_df.columns[0] not in numeric_cols else result_df.columns[1]
+                                y_col = numeric_cols[0]
+                                fig = px.bar(result_df, x=x_col, y=y_col, title="Query Results")
+                                plot_html = fig.to_html(full_html=False)
+                    # Format the data for the interpretation
+                    if len(result_df) > 10:
+                        data_str = f"{result_df.head(10).to_string()}\n... (showing 10 of {len(result_df)} rows)"
+                    else:
+                        data_str = result_df.to_string()
+                    # Interpret the results
+                    interpret_chain = interpret_prompt | llm
+                    interpretation = interpret_chain.invoke({
+                        "question": query,
+                        "sql_query": sql_query,
+                        "data": data_str
+                    }).content.strip()
+                    # Create the response
+                    response = f"**SQL Query:**\n```sql\n{sql_query}\n```\n\n"
+                    if not result_df.empty:
+                        response += f"**Results:**\n```\n{data_str}\n```\n\n"
+                    else:
+                        response += "**No results found.**\n\n"
+                    response += f"**Analysis:**\n{interpretation}"
+                    # Add plot if available
+                    if plot_html:
+                        response += f"\n\n<div>{plot_html}</div>"
+                except Exception as e:
+                    response = f"**SQL Query:**\n```sql\n{sql_query}\n```\n\n**Error executing query:** {str(e)}"
             else:
+                response = "No CSV data has been uploaded yet. Please upload a CSV file first."
             conn.close()
         except Exception as e:
             # Fall back to regular document query
             response = document_assistant.process_query(query)
     else:
         # Process regular document query
         response = document_assistant.process_query(query)
+    # Update history with message format
+    history.append({"role": "user", "content": query})
+    history.append({"role": "assistant", "content": response})
     return "", history
                 # Load CSV into SQLite
                 conn = sqlite3.connect(DB_PATH)
                 load_csv_to_sqlite(file_path, conn, table_name)
+                # Get column info for the table
+                cursor = conn.cursor()
+                cursor.execute(f"PRAGMA table_info({table_name});")
+                columns = [f"{col[1]} ({col[2]})" for col in cursor.fetchall()]
+                # Get row count
+                cursor.execute(f"SELECT COUNT(*) FROM {table_name};")
+                row_count = cursor.fetchone()[0]
                 conn.close()
                 file_info.append(f"CSV data loaded into table: {table_name}")
+                file_info.append(f"Columns: {', '.join(columns)}")
+                file_info.append(f"Rows: {row_count}")
                 # Also index with document assistant for text search
                 result = document_assistant.upload_document(file_path)
         cursor = conn.cursor()
         cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
         tables = cursor.fetchall()
         if tables:
             doc_list.append("\nCSV data tables:")
             for table in tables:
+                # Get column info
+                cursor.execute(f"PRAGMA table_info({table[0]});")
+                columns = [col[1] for col in cursor.fetchall()]
+                # Get row count
+                cursor.execute(f"SELECT COUNT(*) FROM {table[0]};")
+                row_count = cursor.fetchone()[0]
+                doc_list.append(f"- {table[0]} ({row_count} rows, {len(columns)} columns)")
+        conn.close()
+    except Exception as e:
+        doc_list.append(f"Error listing CSV tables: {str(e)}")
     return "\n".join(doc_list)