SVashishta1 commited on
Commit
f35c7b5
Β·
1 Parent(s): 9c8c8b2
Files changed (1) hide show
  1. app.py +27 -23
app.py CHANGED
@@ -223,8 +223,11 @@ def process_file_upload(files):
223
  # Create table name from filename
224
  table_name = os.path.splitext(file_name)[0].replace(' ', '_').lower()
225
 
226
- # Load CSV into SQLite
227
  conn = sqlite3.connect(DB_PATH)
 
 
 
228
  load_csv_to_sqlite(file_path, conn, table_name)
229
 
230
  # Update current context
@@ -234,35 +237,26 @@ def process_file_upload(files):
234
  "table_name": table_name
235
  }
236
 
237
- # Get column info
238
  cursor = conn.cursor()
239
- cursor.execute(f"PRAGMA table_info({table_name});")
240
- columns = [f"{col[1]} ({col[2]})" for col in cursor.fetchall()]
241
-
242
- # Get row count
243
  cursor.execute(f"SELECT COUNT(*) FROM {table_name};")
244
  row_count = cursor.fetchone()[0]
245
 
246
- # Get sample of data
247
- cursor.execute(f"SELECT * FROM {table_name} LIMIT 5;")
248
- sample_rows = cursor.fetchall()
249
 
250
  conn.close()
251
 
252
  file_info.append("βœ… CSV File Successfully Loaded")
253
  file_info.append(f"πŸ“Š Table Name: {table_name}")
254
  file_info.append(f"πŸ“ˆ Total Rows: {row_count:,}")
255
- file_info.append(f"\nπŸ“‹ Columns:")
256
- for col in columns:
257
- file_info.append(f" β€’ {col}")
258
-
259
- if sample_rows:
260
- file_info.append("\nπŸ” Sample Data (first 5 rows):")
261
- sample_df = pd.DataFrame(sample_rows, columns=[col.split(' ')[0] for col in columns])
262
- file_info.append(f"```\n{sample_df.to_string()}\n```")
263
 
264
  except Exception as e:
265
  file_info.append(f"❌ Error loading CSV {file_name}: {str(e)}")
 
 
 
266
 
267
  else:
268
  # Process PDF or other document types
@@ -322,9 +316,11 @@ def load_csv_to_sqlite(file_path, conn, table_name):
322
  conn.execute("PRAGMA journal_mode = MEMORY")
323
  conn.execute("PRAGMA temp_store = MEMORY")
324
  conn.execute("PRAGMA cache_size = 10000")
325
- conn.execute("BEGIN TRANSACTION")
326
 
327
  try:
 
 
 
328
  # Read the CSV in chunks
329
  for i, chunk in enumerate(pd.read_csv(file_path, chunksize=chunksize)):
330
  # Optimize column types
@@ -343,13 +339,21 @@ def load_csv_to_sqlite(file_path, conn, table_name):
343
  # Create indices for common query columns
344
  for col in ['pickup_datetime', 'dropoff_datetime', 'tip_amount', 'fare_amount', 'total_amount']:
345
  try:
346
- conn.execute(f"CREATE INDEX IF NOT EXISTS idx_{table_name}_{col} ON {table_name}({col})")
347
- except:
348
- pass
 
 
 
 
 
349
 
350
- conn.execute("COMMIT")
351
  except Exception as e:
352
- conn.execute("ROLLBACK")
 
 
 
 
353
  raise e
354
 
355
  def list_documents():
 
223
  # Create table name from filename
224
  table_name = os.path.splitext(file_name)[0].replace(' ', '_').lower()
225
 
226
+ # Create a new connection for each file
227
  conn = sqlite3.connect(DB_PATH)
228
+
229
+ # Load CSV into SQLite
230
+ file_info.append(f"Loading CSV file: {file_name}...")
231
  load_csv_to_sqlite(file_path, conn, table_name)
232
 
233
  # Update current context
 
237
  "table_name": table_name
238
  }
239
 
240
+ # Get basic info about the table
241
  cursor = conn.cursor()
 
 
 
 
242
  cursor.execute(f"SELECT COUNT(*) FROM {table_name};")
243
  row_count = cursor.fetchone()[0]
244
 
245
+ cursor.execute(f"PRAGMA table_info({table_name});")
246
+ columns = [col[1] for col in cursor.fetchall()]
 
247
 
248
  conn.close()
249
 
250
  file_info.append("βœ… CSV File Successfully Loaded")
251
  file_info.append(f"πŸ“Š Table Name: {table_name}")
252
  file_info.append(f"πŸ“ˆ Total Rows: {row_count:,}")
253
+ file_info.append(f"πŸ“‹ Columns: {len(columns)}")
 
 
 
 
 
 
 
254
 
255
  except Exception as e:
256
  file_info.append(f"❌ Error loading CSV {file_name}: {str(e)}")
257
+ # Print the full error for debugging
258
+ import traceback
259
+ print(traceback.format_exc())
260
 
261
  else:
262
  # Process PDF or other document types
 
316
  conn.execute("PRAGMA journal_mode = MEMORY")
317
  conn.execute("PRAGMA temp_store = MEMORY")
318
  conn.execute("PRAGMA cache_size = 10000")
 
319
 
320
  try:
321
+ # Start transaction manually
322
+ conn.execute("BEGIN TRANSACTION")
323
+
324
  # Read the CSV in chunks
325
  for i, chunk in enumerate(pd.read_csv(file_path, chunksize=chunksize)):
326
  # Optimize column types
 
339
  # Create indices for common query columns
340
  for col in ['pickup_datetime', 'dropoff_datetime', 'tip_amount', 'fare_amount', 'total_amount']:
341
  try:
342
+ if col in chunk.columns: # Only create index if column exists
343
+ conn.execute(f"CREATE INDEX IF NOT EXISTS idx_{table_name}_{col} ON {table_name}({col})")
344
+ except Exception as idx_error:
345
+ print(f"Warning: Could not create index on {col}: {str(idx_error)}")
346
+
347
+ # Commit the transaction
348
+ conn.commit()
349
+ print(f"Successfully loaded {table_name} into database")
350
 
 
351
  except Exception as e:
352
+ # Only try to rollback if we're in a transaction
353
+ try:
354
+ conn.rollback()
355
+ except:
356
+ pass # If rollback fails, just continue
357
  raise e
358
 
359
  def list_documents():