SVashishta1 commited on
Commit
61ce4a6
·
1 Parent(s): d33fd46

Error Fix

Browse files
Files changed (1) hide show
  1. app.py +67 -103
app.py CHANGED
@@ -35,64 +35,31 @@ DB_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data", "csv_
35
  os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
36
 
37
  # Define the prompt with examples
38
- query_prompt = ChatPromptTemplate.from_messages(
39
- [
40
- ("system", """
41
- You are an SQL and data analysis expert. Generate an appropriate SQL query using SQLite syntax for the question provided, without any explanations or code comments.
42
- Follow SQLite-specific conventions, as shown in the examples below:
43
-
44
- Example 1:
45
- Question: "What is the average fare for trips over 10 miles?"
46
- SQL Query: SELECT AVG(fare_amount) FROM taxi_data WHERE trip_distance > 10;
47
-
48
- Example 2:
49
- Question: "How many trips were taken in each month?"
50
- SQL Query: SELECT strftime('%m', pickup_datetime) AS month, COUNT(*) AS trip_count FROM taxi_data GROUP BY month;
51
 
52
- Example 3:
53
- Question: "What is the total fare amount for each driver (medallion) per day?"
54
- SQL Query: SELECT DATE(pickup_datetime) AS date, medallion, SUM(fare_amount) AS total_fare FROM taxi_data GROUP BY date, medallion;
55
-
56
- SQLite-Specific Conventions:
57
-
58
- 1. Date and Time Extraction:
59
- - Instead of `EXTRACT(YEAR FROM column)`, use `strftime('%Y', column)` to extract the year.
60
- - Example: `SELECT strftime('%Y', pickup_datetime) FROM taxi_data;`
61
 
62
- 2. String Length:
63
- - Instead of `CHAR_LENGTH(column)`, use `LENGTH(column)`.
64
- - Example: `SELECT LENGTH(passenger_name) FROM taxi_data;`
65
 
66
- 3. Regular Expressions:
67
- - SQLite does not support `REGEXP`. Use `LIKE` for simple patterns or avoid regular expressions.
68
- - Example: `SELECT * FROM taxi_data WHERE passenger_name LIKE 'A%';`
69
 
70
- 4. Window Functions:
71
- - For row numbering, use `ROW_NUMBER()` if supported, or simulate with joins.
72
- - Example: `SELECT id, ROW_NUMBER() OVER (ORDER BY pickup_datetime) AS row_num FROM taxi_data;`
73
 
74
- 5. Data Type Casting:
75
- - Use `CAST(column AS TYPE)`, but note that SQLite supports limited types.
76
- - Example: `SELECT CAST(fare_amount AS INTEGER) FROM taxi_data;`
77
 
78
- 6. Full Outer Join Workaround:
79
- - SQLite doesn't support `FULL OUTER JOIN`. Combine `LEFT JOIN` and `UNION` for a similar effect.
80
- - Example:
81
- ```
82
- SELECT a.*, b.*
83
- FROM table_a a
84
- LEFT JOIN table_b b ON a.id = b.id
85
- UNION
86
- SELECT a.*, b.*
87
- FROM table_a a
88
- RIGHT JOIN table_b b ON a.id = b.id;
89
- ```
90
-
91
- Use these examples and guidelines to generate an SQL query compatible with SQLite syntax for the question provided.
92
- """),
93
- ("human", "{question}"),
94
- ]
95
- )
96
 
97
  # Define the prompt for interpreting the SQL query result
98
  interpret_prompt = ChatPromptTemplate.from_messages(
@@ -107,86 +74,83 @@ def process_text_query(query, history):
107
  if not query:
108
  return "", history
109
 
110
- # More specific SQL detection - look for actual SQL-like patterns or explicit SQL requests
111
- sql_keywords = ['select', 'from', 'where', 'group by', 'order by', 'having', 'join']
112
- data_analysis_keywords = ['average', 'count', 'sum', 'maximum', 'minimum', 'mean', 'analyze', 'calculate']
113
-
114
- # Check if this is explicitly about the CSV/database data
115
- is_sql_query = (
116
- any(keyword in query.lower() for keyword in sql_keywords) or
117
- ('csv' in query.lower() and any(keyword in query.lower() for keyword in data_analysis_keywords)) or
118
- 'database' in query.lower() or
119
- 'table' in query.lower()
120
- )
121
-
122
  try:
123
- # Connect to the SQLite database to check if we have any tables
124
  conn = sqlite3.connect(DB_PATH)
125
  cursor = conn.cursor()
126
  cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
127
  tables = [row[0] for row in cursor.fetchall()]
128
- conn.close()
129
 
130
- if is_sql_query and tables:
131
- try:
132
- conn = sqlite3.connect(DB_PATH)
133
- cursor = conn.cursor()
134
-
135
- # Build context with table information
136
- table_info = []
137
- for table in tables:
138
- cursor.execute(f"PRAGMA table_info({table});")
139
- columns = [f"{col[1]} ({col[2]})" for col in cursor.fetchall()]
140
- table_info.append(f"Table '{table}' has columns: {', '.join(columns)}")
141
-
142
- # Create question with context
143
- question_with_context = f"The database contains the following tables:\n{chr(10).join(table_info)}\n\n{query}"
144
-
145
- # Generate SQL query using the query engine
146
- sql_query = query_engine.generate_response(query_prompt.format(question=question_with_context))
147
-
148
- # Verify the response is actually a SQL query
149
- if not any(keyword in sql_query.lower() for keyword in ['select', 'from']):
150
- raise ValueError("Generated response is not a valid SQL query")
151
-
152
  try:
153
- # Execute the query
 
 
 
 
154
  result_df = pd.read_sql_query(sql_query, conn)
155
 
156
- # Format the data for the interpretation
157
  if len(result_df) > 10:
158
  data_str = f"{result_df.head(10).to_string()}\n... (showing 10 of {len(result_df)} rows)"
159
  else:
160
  data_str = result_df.to_string()
161
 
162
- # Create the response
163
  response = f"**SQL Query:**\n```sql\n{sql_query}\n```\n\n"
164
-
165
  if not result_df.empty:
166
  response += f"**Results:**\n```\n{data_str}\n```\n\n"
 
 
 
 
 
 
 
 
 
 
 
167
  else:
168
- response += "**No results found.**\n\n"
 
 
 
 
169
 
170
  except Exception as e:
171
- response = f"**SQL Query:**\n```sql\n{sql_query}\n```\n\n**Error executing query:** {str(e)}"
172
-
173
- conn.close()
174
-
175
- except Exception as e:
176
- # If there's an error with SQL processing, fall back to document query
177
  response = document_assistant.process_query(query)
178
  else:
179
- # Process regular document query
180
  response = document_assistant.process_query(query)
181
 
 
 
182
  except Exception as e:
183
- # If there's any database connection error, fall back to document query
 
184
  response = document_assistant.process_query(query)
185
 
186
- # Update history with message format
187
  history.append({"role": "user", "content": query})
188
  history.append({"role": "assistant", "content": response})
189
-
190
  return "", history
191
 
192
  def process_file_upload(files):
 
35
  os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
36
 
37
  # Define the prompt with examples
38
+ query_prompt = ChatPromptTemplate.from_messages([
39
+ ("system", """You are an SQL expert. Generate an appropriate SQL query using SQLite syntax for the question provided. The query should be executable and return exactly what was asked for.
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ For questions about maximum/highest values, use MAX().
42
+ For minimum/lowest values, use MIN().
43
+ For averages, use AVG().
44
+ For counts, use COUNT().
45
+ For sums, use SUM().
 
 
 
 
46
 
47
+ Examples:
48
+ 1. Question: "What is the highest tip_amount in the dataset?"
49
+ SQL: SELECT MAX(tip_amount) as highest_tip FROM data_tab;
50
 
51
+ 2. Question: "What is the average fare amount?"
52
+ SQL: SELECT AVG(fare_amount) as average_fare FROM data_tab;
 
53
 
54
+ 3. Question: "How many trips are there?"
55
+ SQL: SELECT COUNT(*) as trip_count FROM data_tab;
 
56
 
57
+ 4. Question: "What are the top 5 highest tip amounts?"
58
+ SQL: SELECT * FROM data_tab ORDER BY tip_amount DESC LIMIT 5;
 
59
 
60
+ Generate only the SQL query, nothing else. Make sure to use the correct table name from the context provided."""),
61
+ ("human", "{question}")
62
+ ])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  # Define the prompt for interpreting the SQL query result
65
  interpret_prompt = ChatPromptTemplate.from_messages(
 
74
  if not query:
75
  return "", history
76
 
77
+ # First, check if we have any CSV data loaded
 
 
 
 
 
 
 
 
 
 
 
78
  try:
 
79
  conn = sqlite3.connect(DB_PATH)
80
  cursor = conn.cursor()
81
  cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
82
  tables = [row[0] for row in cursor.fetchall()]
 
83
 
84
+ if tables:
85
+ # Get table schema information
86
+ table_info = []
87
+ for table in tables:
88
+ cursor.execute(f"PRAGMA table_info({table});")
89
+ columns = [f"{col[1]} ({col[2]})" for col in cursor.fetchall()]
90
+ table_info.append(f"Table '{table}' has columns: {', '.join(columns)}")
91
+
92
+ # For questions about specific values, aggregations, or data analysis
93
+ if any(word in query.lower() for word in [
94
+ 'what is', 'how many', 'highest', 'lowest', 'maximum', 'minimum',
95
+ 'average', 'mean', 'sum', 'total', 'count', 'tip', 'fare', 'amount'
96
+ ]):
 
 
 
 
 
 
 
 
 
97
  try:
98
+ # Generate SQL query
99
+ context = f"The database contains the following tables:\n{chr(10).join(table_info)}\n\nQuestion: {query}"
100
+ sql_query = query_engine.generate_response(query_prompt.format(question=context))
101
+
102
+ # Execute query
103
  result_df = pd.read_sql_query(sql_query, conn)
104
 
105
+ # Format results
106
  if len(result_df) > 10:
107
  data_str = f"{result_df.head(10).to_string()}\n... (showing 10 of {len(result_df)} rows)"
108
  else:
109
  data_str = result_df.to_string()
110
 
111
+ # Generate response
112
  response = f"**SQL Query:**\n```sql\n{sql_query}\n```\n\n"
 
113
  if not result_df.empty:
114
  response += f"**Results:**\n```\n{data_str}\n```\n\n"
115
+
116
+ # Add interpretation
117
+ interpret_prompt = f"""
118
+ Question: {query}
119
+ SQL Query: {sql_query}
120
+ Results: {data_str}
121
+
122
+ Please provide a clear, concise answer to the question based on these results.
123
+ """
124
+ interpretation = query_engine.generate_response(interpret_prompt)
125
+ response += f"**Answer:**\n{interpretation}"
126
  else:
127
+ response += "No results found."
128
+
129
+ history.append({"role": "user", "content": query})
130
+ history.append({"role": "assistant", "content": response})
131
+ return "", history
132
 
133
  except Exception as e:
134
+ print(f"SQL Error: {str(e)}")
135
+ # Fall back to document query if SQL fails
136
+ response = document_assistant.process_query(query)
137
+ else:
138
+ # For non-data analysis questions, use document query
 
139
  response = document_assistant.process_query(query)
140
  else:
141
+ # No tables found, use document query
142
  response = document_assistant.process_query(query)
143
 
144
+ conn.close()
145
+
146
  except Exception as e:
147
+ print(f"Database Error: {str(e)}")
148
+ # Fall back to document query if database access fails
149
  response = document_assistant.process_query(query)
150
 
151
+ # Update history
152
  history.append({"role": "user", "content": query})
153
  history.append({"role": "assistant", "content": response})
 
154
  return "", history
155
 
156
  def process_file_upload(files):