SVashishta1 commited on
Commit
6e54ca7
Β·
1 Parent(s): f35c7b5
Files changed (1) hide show
  1. app.py +216 -240
app.py CHANGED
@@ -6,10 +6,9 @@ import tempfile
6
  import pandas as pd
7
  import sqlite3
8
  from langchain_core.prompts import ChatPromptTemplate
 
9
  import plotly.express as px
10
- import plotly.io as pio
11
  import time
12
- from functools import lru_cache
13
 
14
  # Load environment variables
15
  load_dotenv()
@@ -18,164 +17,210 @@ load_dotenv()
18
  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
19
 
20
  from backend.main import DocumentAssistant
21
- from backend.db import SimpleDB
22
- from backend.vector_db import ChromaVectorDB
23
- from backend.query_engine import QueryEngine
24
- from backend.document_parser import SimpleDocumentParser
25
-
26
- # Initialize components
27
- db = SimpleDB()
28
- vector_db = ChromaVectorDB(os.getenv("CHROMA_DB_PATH", "./data/chroma_db"))
29
- query_engine = QueryEngine()
30
 
31
- # Initialize the document parser
32
- document_parser = SimpleDocumentParser()
33
-
34
- # Initialize DocumentAssistant
35
  document_assistant = DocumentAssistant()
36
 
 
 
 
 
 
 
 
 
 
 
 
37
  # Database path for CSV data
38
  DB_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data", "csv_data.db")
39
  os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
40
 
41
- # Define the prompt with examples
42
- query_prompt = ChatPromptTemplate.from_messages([
43
- ("system", """You are an SQL expert. Generate an appropriate SQL query using SQLite syntax for the question provided. The query should be executable and return exactly what was asked for.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- For questions about maximum/highest values, use MAX().
46
- For minimum/lowest values, use MIN().
47
- For averages, use AVG().
48
- For counts, use COUNT().
49
- For sums, use SUM().
50
 
51
- For visualization queries:
52
- 1. For trends over time:
53
- - Group by appropriate time unit (day, month, year)
54
- - Include relevant aggregations (AVG, COUNT, SUM)
55
- 2. For distributions:
56
- - Group by the value being distributed
57
- - Include COUNT or frequency
58
- 3. For comparisons:
59
- - Include multiple measures
60
- - Order appropriately
61
 
62
- Examples:
63
- 1. Question: "Plot tip amount trends by month"
64
- SQL: SELECT strftime('%Y-%m', pickup_datetime) as month, AVG(tip_amount) as avg_tip, COUNT(*) as count FROM data_tab GROUP BY month ORDER BY month;
65
 
66
- 2. Question: "Show distribution of fare amounts"
67
- SQL: SELECT fare_amount, COUNT(*) as frequency FROM data_tab GROUP BY fare_amount ORDER BY fare_amount;
 
68
 
69
- 3. Question: "What is the highest tip_amount in the dataset?"
70
- SQL: SELECT MAX(tip_amount) as highest_tip FROM data_tab;
 
 
 
 
 
 
 
 
 
 
71
 
72
- Generate only the SQL query, nothing else. Make sure to use the correct table name from the context provided."""),
73
- ("human", "{question}")
74
- ])
 
 
 
75
 
76
  # Define the prompt for interpreting the SQL query result
77
  interpret_prompt = ChatPromptTemplate.from_messages(
78
  [
79
- ("system", "You are an experienced data analyst. Examine the following data and provide a clear analysis. Base your analysis solely on the provided data."),
80
- ("human", "Question: {question}\n\nSQL Query: {sql_query}\n\nData:\n{data}")
81
  ]
82
  )
83
 
84
- # Add this as a global variable to track current context
85
- current_context = {
86
- "file_type": None, # 'csv' or 'pdf' or None
87
- "file_name": None,
88
- "table_name": None
89
- }
90
-
91
- # Add a simple cache for database schema information
92
- @lru_cache(maxsize=32)
93
- def get_table_info(table_name):
94
- """Get cached table information"""
95
- conn = sqlite3.connect(DB_PATH)
96
- cursor = conn.cursor()
97
-
98
- # Get column info
99
- cursor.execute(f"PRAGMA table_info({table_name});")
100
- columns = [f"{col[1]} ({col[2]})" for col in cursor.fetchall()]
101
-
102
- # Get row count - use approximate count for large tables
103
- cursor.execute(f"SELECT COUNT(*) FROM {table_name} LIMIT 1;")
104
- row_count = cursor.fetchone()[0]
105
-
106
- conn.close()
107
- return columns, row_count
108
-
109
- # Optimize the process_text_query function
110
  def process_text_query(query, history):
111
  """Process a text query and update chat history"""
112
  if not query:
113
  return "", history
114
 
115
- start_time = time.time()
116
-
117
- # Add a loading message to the history
118
  history.append({"role": "user", "content": query})
119
- history.append({"role": "assistant", "content": "Processing your query..."})
120
 
121
- # Use the current context to determine how to process the query
 
 
122
  if current_context["file_type"] == "csv" and current_context["table_name"]:
123
- table_name = current_context["table_name"]
124
-
125
  try:
126
- # Generate SQL query - simplified for performance
127
- sql_query = f"""
128
- SELECT * FROM {table_name} LIMIT 10;
129
- """
130
 
131
- # For specific types of queries, use optimized SQL
132
- if "highest" in query.lower() or "maximum" in query.lower() or "max" in query.lower():
133
- # Extract the column name from the query
134
- for col in ["tip_amount", "fare_amount", "total_amount"]:
135
- if col in query.lower():
136
- sql_query = f"SELECT MAX({col}) as max_value FROM {table_name};"
137
- break
138
 
139
- elif "average" in query.lower() or "mean" in query.lower() or "avg" in query.lower():
140
- # Extract the column name from the query
141
- for col in ["tip_amount", "fare_amount", "total_amount"]:
142
- if col in query.lower():
143
- sql_query = f"SELECT AVG({col}) as avg_value FROM {table_name};"
144
- break
145
 
146
- # Execute the query with timeout
147
- conn = sqlite3.connect(DB_PATH, timeout=10)
148
- conn.execute("PRAGMA temp_store = MEMORY;") # Store temp tables in memory
149
- conn.execute("PRAGMA journal_mode = OFF;") # Disable journaling
150
- conn.execute("PRAGMA synchronous = OFF;") # Disable synchronous writes
151
 
152
- # Use pandas with a small chunk size for large tables
153
- result_df = pd.read_sql_query(sql_query, conn, chunksize=1000)
154
 
155
- # Process the first chunk only for performance
156
- if hasattr(result_df, '__next__'):
157
- result_df = next(result_df)
158
 
159
- # Format the response
160
- if len(result_df) > 0:
161
- data_str = result_df.to_string(max_rows=5)
162
- response = f"**Results:**\n```\n{data_str}\n```\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
- # Add a simple interpretation based on the query type
165
- if "highest" in query.lower() or "maximum" in query.lower():
166
- for col in result_df.columns:
167
- if "max" in col.lower():
168
- response += f"The highest value is {result_df[col].iloc[0]}."
169
- break
170
- elif "average" in query.lower() or "mean" in query.lower():
171
- for col in result_df.columns:
172
- if "avg" in col.lower():
173
- response += f"The average value is {result_df[col].iloc[0]:.2f}."
174
- break
175
  else:
176
- response += f"Here are the first {len(result_df)} results from the table."
177
- else:
178
- response = "No results found for your query."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
  conn.close()
181
 
@@ -193,8 +238,8 @@ def process_text_query(query, history):
193
  processing_time = time.time() - start_time
194
  response += f"\n\n(Query processed in {processing_time:.2f} seconds)"
195
 
196
- # Update the last message with the actual response
197
- history[-1] = {"role": "assistant", "content": response}
198
 
199
  return "", history
200
 
@@ -223,40 +268,43 @@ def process_file_upload(files):
223
  # Create table name from filename
224
  table_name = os.path.splitext(file_name)[0].replace(' ', '_').lower()
225
 
226
- # Create a new connection for each file
227
  conn = sqlite3.connect(DB_PATH)
228
 
229
- # Load CSV into SQLite
230
- file_info.append(f"Loading CSV file: {file_name}...")
231
- load_csv_to_sqlite(file_path, conn, table_name)
 
 
 
 
232
 
233
  # Update current context
234
  current_context = {
235
  "file_type": "csv",
236
  "file_name": file_name,
237
- "table_name": table_name
238
  }
239
 
240
- # Get basic info about the table
241
  cursor = conn.cursor()
242
- cursor.execute(f"SELECT COUNT(*) FROM {table_name};")
243
- row_count = cursor.fetchone()[0]
244
 
245
- cursor.execute(f"PRAGMA table_info({table_name});")
246
- columns = [col[1] for col in cursor.fetchall()]
 
247
 
248
  conn.close()
249
 
250
  file_info.append("βœ… CSV File Successfully Loaded")
251
- file_info.append(f"πŸ“Š Table Name: {table_name}")
 
252
  file_info.append(f"πŸ“ˆ Total Rows: {row_count:,}")
253
- file_info.append(f"πŸ“‹ Columns: {len(columns)}")
254
 
255
  except Exception as e:
256
  file_info.append(f"❌ Error loading CSV {file_name}: {str(e)}")
257
- # Print the full error for debugging
258
- import traceback
259
- print(traceback.format_exc())
260
 
261
  else:
262
  # Process PDF or other document types
@@ -279,83 +327,6 @@ def process_file_upload(files):
279
 
280
  return "\n".join(file_info)
281
 
282
- def process_voice_input(audio_path):
283
- """Process voice input and return transcribed text"""
284
- if audio_path is None:
285
- return "No audio recorded"
286
-
287
- # Since we don't have VoiceAssistant, return a placeholder message
288
- return "Voice transcription is not available"
289
-
290
- def text_to_speech_output(text):
291
- """Convert text to speech"""
292
- if not text or len(text) == 0:
293
- return None
294
-
295
- # Extract the last assistant message
296
- last_message = None
297
- for msg in reversed(text):
298
- if msg["role"] == "assistant":
299
- last_message = msg["content"]
300
- break
301
-
302
- if not last_message:
303
- return None
304
-
305
- # Since we don't have VoiceAssistant, return None
306
- return None
307
-
308
- # Optimize the load_csv_to_sqlite function
309
- def load_csv_to_sqlite(file_path, conn, table_name):
310
- """Load CSV data into SQLite database with optimizations"""
311
- # Use larger chunk size for faster loading
312
- chunksize = 10000
313
-
314
- # Configure SQLite for faster imports
315
- conn.execute("PRAGMA synchronous = OFF")
316
- conn.execute("PRAGMA journal_mode = MEMORY")
317
- conn.execute("PRAGMA temp_store = MEMORY")
318
- conn.execute("PRAGMA cache_size = 10000")
319
-
320
- try:
321
- # Start transaction manually
322
- conn.execute("BEGIN TRANSACTION")
323
-
324
- # Read the CSV in chunks
325
- for i, chunk in enumerate(pd.read_csv(file_path, chunksize=chunksize)):
326
- # Optimize column types
327
- for col in chunk.columns:
328
- # Convert date columns to datetime
329
- if 'date' in col.lower() or 'time' in col.lower():
330
- try:
331
- chunk[col] = pd.to_datetime(chunk[col], errors='coerce')
332
- except:
333
- pass
334
-
335
- # Load the chunk into the SQLite database
336
- if_exists = 'replace' if i == 0 else 'append'
337
- chunk.to_sql(table_name, conn, if_exists=if_exists, index=False, method='multi')
338
-
339
- # Create indices for common query columns
340
- for col in ['pickup_datetime', 'dropoff_datetime', 'tip_amount', 'fare_amount', 'total_amount']:
341
- try:
342
- if col in chunk.columns: # Only create index if column exists
343
- conn.execute(f"CREATE INDEX IF NOT EXISTS idx_{table_name}_{col} ON {table_name}({col})")
344
- except Exception as idx_error:
345
- print(f"Warning: Could not create index on {col}: {str(idx_error)}")
346
-
347
- # Commit the transaction
348
- conn.commit()
349
- print(f"Successfully loaded {table_name} into database")
350
-
351
- except Exception as e:
352
- # Only try to rollback if we're in a transaction
353
- try:
354
- conn.rollback()
355
- except:
356
- pass # If rollback fails, just continue
357
- raise e
358
-
359
  def list_documents():
360
  """List all indexed documents"""
361
  info_list = []
@@ -378,22 +349,7 @@ def list_documents():
378
  cursor.execute(f"SELECT COUNT(*) FROM {table[0]};")
379
  row_count = cursor.fetchone()[0]
380
 
381
- # Get sample of unique values for some interesting columns
382
- sample_info = []
383
- for col in ['vendor_id', 'rate_code', 'payment_type']:
384
- if col in columns:
385
- cursor.execute(f"SELECT DISTINCT {col} FROM {table[0]} LIMIT 5;")
386
- unique_vals = [str(row[0]) for row in cursor.fetchall()]
387
- if unique_vals:
388
- sample_info.append(f"{col}: {', '.join(unique_vals)}")
389
-
390
- info_list.append(f"\nπŸ”Ή Table: {table[0]}")
391
- info_list.append(f" - Rows: {row_count:,}")
392
- info_list.append(f" - Columns: {len(columns)}")
393
- if sample_info:
394
- info_list.append(" - Sample values:")
395
- for info in sample_info:
396
- info_list.append(f" β€’ {info}")
397
 
398
  conn.close()
399
  except Exception as e:
@@ -421,6 +377,32 @@ def clear_context():
421
  }
422
  return None
423
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
424
  # Create Gradio interface
425
  with gr.Blocks(title="AI Document Analysis & Voice Assistant") as demo:
426
  gr.Markdown("# πŸ€– AI Document Analysis & Voice Assistant")
@@ -466,6 +448,7 @@ with gr.Blocks(title="AI Document Analysis & Voice Assistant") as demo:
466
  )
467
 
468
  clear_btn.click(lambda: None, None, chatbot, queue=False)
 
469
 
470
  voice_btn.click(
471
  lambda: gr.update(visible=True),
@@ -486,13 +469,6 @@ with gr.Blocks(title="AI Document Analysis & Voice Assistant") as demo:
486
  inputs=[chatbot],
487
  outputs=[audio_output]
488
  )
489
-
490
- # Add event handler for clear context button
491
- clear_context_btn.click(
492
- clear_context,
493
- inputs=[],
494
- outputs=[chatbot]
495
- )
496
 
497
  with gr.Tab("Document Upload"):
498
  file_upload = gr.File(
 
6
  import pandas as pd
7
  import sqlite3
8
  from langchain_core.prompts import ChatPromptTemplate
9
+ from langchain_groq import ChatGroq
10
  import plotly.express as px
 
11
  import time
 
12
 
13
  # Load environment variables
14
  load_dotenv()
 
17
  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
18
 
19
  from backend.main import DocumentAssistant
 
 
 
 
 
 
 
 
 
20
 
21
+ # Initialize the document assistant
 
 
 
22
  document_assistant = DocumentAssistant()
23
 
24
+ # Initialize the LLM using the llama3-8b-8192 model from Groq
25
+ llm = ChatGroq(
26
+ model="llama3-8b-8192",
27
+ temperature=0,
28
+ max_tokens=None,
29
+ timeout=None,
30
+ max_retries=2,
31
+ verbose=True,
32
+ api_key=os.getenv("GROQ_API_KEY")
33
+ )
34
+
35
  # Database path for CSV data
36
  DB_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data", "csv_data.db")
37
  os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
38
 
39
+ # Current context to track what we're working with
40
+ current_context = {
41
+ "file_type": None,
42
+ "file_name": None,
43
+ "table_name": None
44
+ }
45
+
46
+ # Define the prompt with examples for SQL query generation
47
+ query_prompt = ChatPromptTemplate.from_messages(
48
+ [
49
+ ("system", """
50
+ You are an SQL and data analysis expert. Generate an appropriate SQL query using SQLite syntax for the question provided, without any explanations or code comments.
51
+ Follow SQLite-specific conventions, as shown in the examples below:
52
+
53
+ Example 1:
54
+ Question: "What is the average fare for trips over 10 miles?"
55
+ SQL Query: SELECT AVG(fare_amount) FROM data_tab WHERE trip_distance > 10;
56
+
57
+ Example 2:
58
+ Question: "How many trips were taken in each month?"
59
+ SQL Query: SELECT strftime('%m', pickup_datetime) AS month, COUNT(*) AS trip_count FROM data_tab GROUP BY month;
60
+
61
+ Example 3:
62
+ Question: "What is the total fare amount for each driver (medallion) per day?"
63
+ SQL Query: SELECT DATE(pickup_datetime) AS date, medallion, SUM(fare_amount) AS total_fare FROM data_tab GROUP BY date, medallion;
64
+
65
+ Example 4:
66
+ Question: "What is the highest tip amount in the dataset?"
67
+ SQL Query: SELECT MAX(tip_amount) as highest_tip FROM data_tab;
68
+
69
+ Example 5:
70
+ Question: "Plot a bar graph for tip trends by month"
71
+ SQL Query: SELECT strftime('%Y-%m', pickup_datetime) as month, AVG(tip_amount) as avg_tip, COUNT(*) as count FROM data_tab GROUP BY month ORDER BY month;
72
+
73
+ SQLite-Specific Conventions:
74
+
75
+ 1. Date and Time Extraction:
76
+ - Instead of `EXTRACT(YEAR FROM column)`, use `strftime('%Y', column)` to extract the year.
77
+ - Example: `SELECT strftime('%Y', pickup_datetime) FROM data_tab;`
78
 
79
+ 2. String Length:
80
+ - Instead of `CHAR_LENGTH(column)`, use `LENGTH(column)`.
81
+ - Example: `SELECT LENGTH(passenger_name) FROM data_tab;`
 
 
82
 
83
+ 3. Regular Expressions:
84
+ - SQLite does not support `REGEXP`. Use `LIKE` for simple patterns or avoid regular expressions.
85
+ - Example: `SELECT * FROM data_tab WHERE passenger_name LIKE 'A%';`
 
 
 
 
 
 
 
86
 
87
+ 4. Window Functions:
88
+ - For row numbering, use `ROW_NUMBER()` if supported, or simulate with joins.
89
+ - Example: `SELECT id, ROW_NUMBER() OVER (ORDER BY pickup_datetime) AS row_num FROM data_tab;`
90
 
91
+ 5. Data Type Casting:
92
+ - Use `CAST(column AS TYPE)`, but note that SQLite supports limited types.
93
+ - Example: `SELECT CAST(fare_amount AS INTEGER) FROM data_tab;`
94
 
95
+ 6. Full Outer Join Workaround:
96
+ - SQLite doesn't support `FULL OUTER JOIN`. Combine `LEFT JOIN` and `UNION` for a similar effect.
97
+ - Example:
98
+ ```
99
+ SELECT a.*, b.*
100
+ FROM table_a a
101
+ LEFT JOIN table_b b ON a.id = b.id
102
+ UNION
103
+ SELECT a.*, b.*
104
+ FROM table_a a
105
+ RIGHT JOIN table_b b ON a.id = b.id;
106
+ ```
107
 
108
+ Use these examples and guidelines to generate an SQL query compatible with SQLite syntax for the question provided.
109
+ Always use 'data_tab' as the table name.
110
+ """),
111
+ ("human", "{question}"),
112
+ ]
113
+ )
114
 
115
  # Define the prompt for interpreting the SQL query result
116
  interpret_prompt = ChatPromptTemplate.from_messages(
117
  [
118
+ ("system", "You are an experienced data analyst. Provide a concise, natural language answer based on the given data summary. If relevant, give key statistics, trends, or patterns."),
119
+ ("human", "Question: {question}\nSQL Query: {sql_query}\nData Summary:\n{data_summary}")
120
  ]
121
  )
122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  def process_text_query(query, history):
124
  """Process a text query and update chat history"""
125
  if not query:
126
  return "", history
127
 
128
+ # Add the user's query to history
 
 
129
  history.append({"role": "user", "content": query})
 
130
 
131
+ start_time = time.time()
132
+
133
+ # Check if we're in CSV context
134
  if current_context["file_type"] == "csv" and current_context["table_name"]:
 
 
135
  try:
136
+ # Connect to the database
137
+ conn = sqlite3.connect(DB_PATH)
 
 
138
 
139
+ # Get column information for context
140
+ cursor = conn.cursor()
141
+ cursor.execute(f"PRAGMA table_info({current_context['table_name']});")
142
+ columns = [info[1] for info in cursor.fetchall()]
143
+ columns_str = ", ".join(columns)
 
 
144
 
145
+ # Create question with context
146
+ question_with_context = f"The table 'data_tab' has columns: {columns_str}. {query}"
 
 
 
 
147
 
148
+ # Generate SQL query using LLM
149
+ ai_msg = query_prompt | llm
150
+ sql_query = ai_msg.invoke({"question": question_with_context}).content.strip()
 
 
151
 
152
+ print(f"Generated SQL Query: {sql_query}")
 
153
 
154
+ # Check if this is a visualization request
155
+ is_visualization = any(word in query.lower() for word in ['plot', 'graph', 'chart', 'visualize', 'visualization', 'trend'])
 
156
 
157
+ try:
158
+ # Execute the query
159
+ result_df = pd.read_sql_query(sql_query, conn)
160
+
161
+ # Generate data summary
162
+ if not result_df.empty:
163
+ data_summary = result_df.describe(include='all').to_string()
164
+
165
+ # For small result sets, include the actual data
166
+ if len(result_df) <= 10:
167
+ data_summary += f"\n\nFull Results:\n{result_df.to_string()}"
168
+ else:
169
+ data_summary += f"\n\nFirst 5 rows:\n{result_df.head(5).to_string()}"
170
+ else:
171
+ data_summary = "No relevant data found."
172
+
173
+ # Generate interpretation
174
+ answer_chain = interpret_prompt | llm
175
+ interpretation = answer_chain.invoke({
176
+ "question": query,
177
+ "sql_query": sql_query,
178
+ "data_summary": data_summary
179
+ }).content.strip()
180
 
181
+ # Create the response
182
+ response = f"**SQL Query:**\n```sql\n{sql_query}\n```\n\n"
183
+
184
+ if not result_df.empty:
185
+ if len(result_df) > 10:
186
+ response += f"**Results (first 5 of {len(result_df)} rows):**\n```\n{result_df.head(5).to_string()}\n```\n\n"
187
+ else:
188
+ response += f"**Results:**\n```\n{result_df.to_string()}\n```\n\n"
 
 
 
189
  else:
190
+ response += "**No results found.**\n\n"
191
+
192
+ response += f"**Analysis:**\n{interpretation}"
193
+
194
+ # Add visualization if requested
195
+ if is_visualization and not result_df.empty:
196
+ try:
197
+ # Determine the type of visualization based on the data
198
+ if len(result_df.columns) >= 2:
199
+ # Find numeric columns for y-axis
200
+ numeric_cols = result_df.select_dtypes(include=['number']).columns.tolist()
201
+
202
+ if len(numeric_cols) >= 1 and len(result_df) > 1:
203
+ # Use the first column as x and first numeric column as y
204
+ x_col = result_df.columns[0]
205
+ y_col = numeric_cols[0]
206
+
207
+ # Create appropriate plot based on data characteristics
208
+ if 'month' in result_df.columns or 'date' in result_df.columns or 'year' in result_df.columns:
209
+ # Time series data - use line chart
210
+ fig = px.line(result_df, x=x_col, y=numeric_cols, title="Time Series Analysis")
211
+ else:
212
+ # Regular data - use bar chart
213
+ fig = px.bar(result_df, x=x_col, y=y_col, title="Data Visualization")
214
+
215
+ # Convert to HTML and add to response
216
+ plot_html = fig.to_html(full_html=False, include_plotlyjs='cdn')
217
+ response += f"\n\n**Visualization:**\n<div>{plot_html}</div>"
218
+ except Exception as viz_error:
219
+ print(f"Visualization error: {str(viz_error)}")
220
+ # Continue without visualization if there's an error
221
+
222
+ except Exception as e:
223
+ response = f"**SQL Query:**\n```sql\n{sql_query}\n```\n\n**Error executing query:** {str(e)}"
224
 
225
  conn.close()
226
 
 
238
  processing_time = time.time() - start_time
239
  response += f"\n\n(Query processed in {processing_time:.2f} seconds)"
240
 
241
+ # Add the response to history
242
+ history.append({"role": "assistant", "content": response})
243
 
244
  return "", history
245
 
 
268
  # Create table name from filename
269
  table_name = os.path.splitext(file_name)[0].replace(' ', '_').lower()
270
 
271
+ # Load CSV into SQLite
272
  conn = sqlite3.connect(DB_PATH)
273
 
274
+ # Configure SQLite for faster imports
275
+ conn.execute("PRAGMA synchronous = OFF")
276
+ conn.execute("PRAGMA journal_mode = MEMORY")
277
+
278
+ # Read the CSV and load it into SQLite
279
+ df = pd.read_csv(file_path)
280
+ df.to_sql('data_tab', conn, if_exists='replace', index=False)
281
 
282
  # Update current context
283
  current_context = {
284
  "file_type": "csv",
285
  "file_name": file_name,
286
+ "table_name": "data_tab" # Always use data_tab as the table name
287
  }
288
 
289
+ # Get column info
290
  cursor = conn.cursor()
291
+ cursor.execute("PRAGMA table_info(data_tab);")
292
+ columns = [f"{col[1]} ({col[2]})" for col in cursor.fetchall()]
293
 
294
+ # Get row count
295
+ cursor.execute("SELECT COUNT(*) FROM data_tab;")
296
+ row_count = cursor.fetchone()[0]
297
 
298
  conn.close()
299
 
300
  file_info.append("βœ… CSV File Successfully Loaded")
301
+ file_info.append(f"πŸ“Š Table Name: data_tab")
302
+ file_info.append(f"πŸ“„ Source File: {file_name}")
303
  file_info.append(f"πŸ“ˆ Total Rows: {row_count:,}")
304
+ file_info.append(f"πŸ“‹ Columns: {', '.join(columns)}")
305
 
306
  except Exception as e:
307
  file_info.append(f"❌ Error loading CSV {file_name}: {str(e)}")
 
 
 
308
 
309
  else:
310
  # Process PDF or other document types
 
327
 
328
  return "\n".join(file_info)
329
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  def list_documents():
331
  """List all indexed documents"""
332
  info_list = []
 
349
  cursor.execute(f"SELECT COUNT(*) FROM {table[0]};")
350
  row_count = cursor.fetchone()[0]
351
 
352
+ info_list.append(f"- {table[0]} ({row_count:,} rows, {len(columns)} columns)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
 
354
  conn.close()
355
  except Exception as e:
 
377
  }
378
  return None
379
 
380
+ def process_voice_input(audio_path):
381
+ """Process voice input and return transcribed text"""
382
+ if audio_path is None:
383
+ return "No audio recorded"
384
+
385
+ # Since we don't have VoiceAssistant, return a placeholder message
386
+ return "Voice transcription is not available"
387
+
388
+ def text_to_speech_output(text):
389
+ """Convert text to speech"""
390
+ if not text or len(text) == 0:
391
+ return None
392
+
393
+ # Extract the last assistant message
394
+ last_message = None
395
+ for msg in reversed(text):
396
+ if msg["role"] == "assistant":
397
+ last_message = msg["content"]
398
+ break
399
+
400
+ if not last_message:
401
+ return None
402
+
403
+ # Since we don't have VoiceAssistant, return None
404
+ return None
405
+
406
  # Create Gradio interface
407
  with gr.Blocks(title="AI Document Analysis & Voice Assistant") as demo:
408
  gr.Markdown("# πŸ€– AI Document Analysis & Voice Assistant")
 
448
  )
449
 
450
  clear_btn.click(lambda: None, None, chatbot, queue=False)
451
+ clear_context_btn.click(clear_context, inputs=[], outputs=[chatbot])
452
 
453
  voice_btn.click(
454
  lambda: gr.update(visible=True),
 
469
  inputs=[chatbot],
470
  outputs=[audio_output]
471
  )
 
 
 
 
 
 
 
472
 
473
  with gr.Tab("Document Upload"):
474
  file_upload = gr.File(