SVashishta1
commited on
Commit
Β·
f35c7b5
1
Parent(s):
9c8c8b2
Error Fix
Browse files
app.py
CHANGED
|
@@ -223,8 +223,11 @@ def process_file_upload(files):
|
|
| 223 |
# Create table name from filename
|
| 224 |
table_name = os.path.splitext(file_name)[0].replace(' ', '_').lower()
|
| 225 |
|
| 226 |
-
#
|
| 227 |
conn = sqlite3.connect(DB_PATH)
|
|
|
|
|
|
|
|
|
|
| 228 |
load_csv_to_sqlite(file_path, conn, table_name)
|
| 229 |
|
| 230 |
# Update current context
|
|
@@ -234,35 +237,26 @@ def process_file_upload(files):
|
|
| 234 |
"table_name": table_name
|
| 235 |
}
|
| 236 |
|
| 237 |
-
# Get
|
| 238 |
cursor = conn.cursor()
|
| 239 |
-
cursor.execute(f"PRAGMA table_info({table_name});")
|
| 240 |
-
columns = [f"{col[1]} ({col[2]})" for col in cursor.fetchall()]
|
| 241 |
-
|
| 242 |
-
# Get row count
|
| 243 |
cursor.execute(f"SELECT COUNT(*) FROM {table_name};")
|
| 244 |
row_count = cursor.fetchone()[0]
|
| 245 |
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
sample_rows = cursor.fetchall()
|
| 249 |
|
| 250 |
conn.close()
|
| 251 |
|
| 252 |
file_info.append("β
CSV File Successfully Loaded")
|
| 253 |
file_info.append(f"π Table Name: {table_name}")
|
| 254 |
file_info.append(f"π Total Rows: {row_count:,}")
|
| 255 |
-
file_info.append(f"
|
| 256 |
-
for col in columns:
|
| 257 |
-
file_info.append(f" β’ {col}")
|
| 258 |
-
|
| 259 |
-
if sample_rows:
|
| 260 |
-
file_info.append("\nπ Sample Data (first 5 rows):")
|
| 261 |
-
sample_df = pd.DataFrame(sample_rows, columns=[col.split(' ')[0] for col in columns])
|
| 262 |
-
file_info.append(f"```\n{sample_df.to_string()}\n```")
|
| 263 |
|
| 264 |
except Exception as e:
|
| 265 |
file_info.append(f"β Error loading CSV {file_name}: {str(e)}")
|
|
|
|
|
|
|
|
|
|
| 266 |
|
| 267 |
else:
|
| 268 |
# Process PDF or other document types
|
|
@@ -322,9 +316,11 @@ def load_csv_to_sqlite(file_path, conn, table_name):
|
|
| 322 |
conn.execute("PRAGMA journal_mode = MEMORY")
|
| 323 |
conn.execute("PRAGMA temp_store = MEMORY")
|
| 324 |
conn.execute("PRAGMA cache_size = 10000")
|
| 325 |
-
conn.execute("BEGIN TRANSACTION")
|
| 326 |
|
| 327 |
try:
|
|
|
|
|
|
|
|
|
|
| 328 |
# Read the CSV in chunks
|
| 329 |
for i, chunk in enumerate(pd.read_csv(file_path, chunksize=chunksize)):
|
| 330 |
# Optimize column types
|
|
@@ -343,13 +339,21 @@ def load_csv_to_sqlite(file_path, conn, table_name):
|
|
| 343 |
# Create indices for common query columns
|
| 344 |
for col in ['pickup_datetime', 'dropoff_datetime', 'tip_amount', 'fare_amount', 'total_amount']:
|
| 345 |
try:
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 349 |
|
| 350 |
-
conn.execute("COMMIT")
|
| 351 |
except Exception as e:
|
| 352 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
raise e
|
| 354 |
|
| 355 |
def list_documents():
|
|
|
|
| 223 |
# Create table name from filename
|
| 224 |
table_name = os.path.splitext(file_name)[0].replace(' ', '_').lower()
|
| 225 |
|
| 226 |
+
# Create a new connection for each file
|
| 227 |
conn = sqlite3.connect(DB_PATH)
|
| 228 |
+
|
| 229 |
+
# Load CSV into SQLite
|
| 230 |
+
file_info.append(f"Loading CSV file: {file_name}...")
|
| 231 |
load_csv_to_sqlite(file_path, conn, table_name)
|
| 232 |
|
| 233 |
# Update current context
|
|
|
|
| 237 |
"table_name": table_name
|
| 238 |
}
|
| 239 |
|
| 240 |
+
# Get basic info about the table
|
| 241 |
cursor = conn.cursor()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
cursor.execute(f"SELECT COUNT(*) FROM {table_name};")
|
| 243 |
row_count = cursor.fetchone()[0]
|
| 244 |
|
| 245 |
+
cursor.execute(f"PRAGMA table_info({table_name});")
|
| 246 |
+
columns = [col[1] for col in cursor.fetchall()]
|
|
|
|
| 247 |
|
| 248 |
conn.close()
|
| 249 |
|
| 250 |
file_info.append("β
CSV File Successfully Loaded")
|
| 251 |
file_info.append(f"π Table Name: {table_name}")
|
| 252 |
file_info.append(f"π Total Rows: {row_count:,}")
|
| 253 |
+
file_info.append(f"π Columns: {len(columns)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
|
| 255 |
except Exception as e:
|
| 256 |
file_info.append(f"β Error loading CSV {file_name}: {str(e)}")
|
| 257 |
+
# Print the full error for debugging
|
| 258 |
+
import traceback
|
| 259 |
+
print(traceback.format_exc())
|
| 260 |
|
| 261 |
else:
|
| 262 |
# Process PDF or other document types
|
|
|
|
| 316 |
conn.execute("PRAGMA journal_mode = MEMORY")
|
| 317 |
conn.execute("PRAGMA temp_store = MEMORY")
|
| 318 |
conn.execute("PRAGMA cache_size = 10000")
|
|
|
|
| 319 |
|
| 320 |
try:
|
| 321 |
+
# Start transaction manually
|
| 322 |
+
conn.execute("BEGIN TRANSACTION")
|
| 323 |
+
|
| 324 |
# Read the CSV in chunks
|
| 325 |
for i, chunk in enumerate(pd.read_csv(file_path, chunksize=chunksize)):
|
| 326 |
# Optimize column types
|
|
|
|
| 339 |
# Create indices for common query columns
|
| 340 |
for col in ['pickup_datetime', 'dropoff_datetime', 'tip_amount', 'fare_amount', 'total_amount']:
|
| 341 |
try:
|
| 342 |
+
if col in chunk.columns: # Only create index if column exists
|
| 343 |
+
conn.execute(f"CREATE INDEX IF NOT EXISTS idx_{table_name}_{col} ON {table_name}({col})")
|
| 344 |
+
except Exception as idx_error:
|
| 345 |
+
print(f"Warning: Could not create index on {col}: {str(idx_error)}")
|
| 346 |
+
|
| 347 |
+
# Commit the transaction
|
| 348 |
+
conn.commit()
|
| 349 |
+
print(f"Successfully loaded {table_name} into database")
|
| 350 |
|
|
|
|
| 351 |
except Exception as e:
|
| 352 |
+
# Only try to rollback if we're in a transaction
|
| 353 |
+
try:
|
| 354 |
+
conn.rollback()
|
| 355 |
+
except:
|
| 356 |
+
pass # If rollback fails, just continue
|
| 357 |
raise e
|
| 358 |
|
| 359 |
def list_documents():
|