LakshmiHarika commited on
Commit
783613c
·
verified ·
1 Parent(s): be6ba10

Update pages/Data Collection.py

Browse files
Files changed (1) hide show
  1. pages/Data Collection.py +29 -17
pages/Data Collection.py CHANGED
@@ -412,43 +412,55 @@ if st.session_state.current_page == "explore_excel":
412
  for chunk in pd.read_excel('large_file.xlsx', chunksize=chunk_size):
413
  print(chunk.head())
414
  """, language="python")
415
-
416
  # 5. Sheet Name Selection
417
- st.write("#### 5. Sheet Name Selection")
 
 
418
  st.write("Excel files may have multiple sheets, and reading the wrong one can lead to incorrect analysis.")
419
  st.code("""
 
420
  data = pd.read_excel('file.xlsx', sheet_name='Sheet1')
421
  print(data.head())
422
  """, language="python")
423
 
424
  # 6. Data Type Conversion
425
- st.write("#### 6. Data Type Conversion")
 
 
426
  st.write("Excel files may have columns with inconsistent or incorrect data types.")
427
  st.code("""
 
428
  data = pd.read_excel('file.xlsx')
429
- data['column_name'] = data['column_name'].astype(int)
430
  print(data.dtypes)
431
  """, language="python")
432
-
433
- # 7. Merged Cells
434
- st.write("#### 7. Merged Cells")
435
- st.write("Merged cells in Excel can lead to missing or misaligned data.")
 
 
436
  st.code("""
437
- data = pd.read_excel('file.xlsx', merge_cells=False)
 
 
 
438
  print(data.head())
439
  """, language="python")
440
-
441
 
442
- # 8. Date Parsing
443
- st.write("#### 8. Date Parsing")
444
- st.write("Dates in Excel files may not be interpreted correctly.")
 
 
445
  st.code("""
446
- data = pd.read_excel('file.xlsx', parse_dates=['date_column'])
447
- print(data.dtypes)
 
448
  """, language="python")
449
 
450
- # Back Button
451
- col1 = st.columns(1)
452
  with col1:
453
  if st.button("⬅️ Back to Previous Page"):
454
  navigate_to("main")
 
412
  for chunk in pd.read_excel('large_file.xlsx', chunksize=chunk_size):
413
  print(chunk.head())
414
  """, language="python")
415
+
416
  # 5. Sheet Name Selection
417
+ st.markdown("""
418
+ <h4 style="color: #5b2c6f;">5. Sheet Name Selection</h4>
419
+ """, unsafe_allow_html=True)
420
  st.write("Excel files may have multiple sheets, and reading the wrong one can lead to incorrect analysis.")
421
  st.code("""
422
+ # Specify the sheet name explicitly
423
  data = pd.read_excel('file.xlsx', sheet_name='Sheet1')
424
  print(data.head())
425
  """, language="python")
426
 
427
  # 6. Data Type Conversion
428
+ st.markdown("""
429
+ <h4 style="color: #5b2c6f;">6. Data Type Conversion</h4>
430
+ """, unsafe_allow_html=True)
431
  st.write("Excel files may have columns with inconsistent or incorrect data types.")
432
  st.code("""
433
+ # Convert columns to appropriate data types
434
  data = pd.read_excel('file.xlsx')
435
+ data['column_name'] = data['column_name'].astype(int) # Replace 'column_name' with your column
436
  print(data.dtypes)
437
  """, language="python")
438
+
439
+ # 7. Hidden Characters or Whitespace
440
+ st.markdown("""
441
+ <h4 style="color: #5b2c6f;">7. Hidden Characters or Whitespace</h4>
442
+ """, unsafe_allow_html=True)
443
+ st.write("Whitespace or hidden characters in the data can cause parsing issues.")
444
  st.code("""
445
+ # Remove leading/trailing whitespaces
446
+ data = pd.read_excel('file.xlsx')
447
+ data.columns = data.columns.str.strip() # Remove whitespace from column names
448
+ data['column_name'] = data['column_name'].str.strip() # Clean specific column
449
  print(data.head())
450
  """, language="python")
 
451
 
452
+ # 8. Merged Cells
453
+ st.markdown("""
454
+ <h4 style="color: #5b2c6f;">8. Merged Cells</h4>
455
+ """, unsafe_allow_html=True)
456
+ st.write("Merged cells in Excel can lead to missing or misaligned data.")
457
  st.code("""
458
+ # Handle merged cells by filling forward
459
+ data = pd.read_excel('file.xlsx', merge_cells=False) # Disable merging
460
+ print(data.head())
461
  """, language="python")
462
 
463
+ col1 = st.columns(1)[0] # Access the first (and only) column from the list of columns
 
464
  with col1:
465
  if st.button("⬅️ Back to Previous Page"):
466
  navigate_to("main")