LakshmiHarika commited on
Commit
be6ba10
·
verified ·
1 Parent(s): e4e65de

Update pages/Data Collection.py

Browse files
Files changed (1) hide show
  1. pages/Data Collection.py +46 -29
pages/Data Collection.py CHANGED
@@ -267,13 +267,15 @@ if st.session_state.current_page == "main":
267
  if st.button("🌐 HTML"):
268
  navigate_to("explore_html")
269
 
270
- # Pages for Each Format
271
- elif st.session_state.current_page == "explore_excel":
272
- # Section about Excel
 
273
  st.markdown("""
274
  <h2 style="color: #BB3385;">Excel</h2>
275
  """, unsafe_allow_html=True)
276
-
 
277
  st.write("""
278
  - **Excel** is a powerful spreadsheet software developed by Microsoft.
279
  - It is widely used for:
@@ -288,29 +290,30 @@ elif st.session_state.current_page == "explore_excel":
288
  - Excel is an essential tool for managing and analyzing structured data in various industries.
289
  """)
290
 
 
291
  st.markdown("""
292
  <h3 style="color: #5b2c6f;">Reading Excel Files in Python</h3>
293
  """, unsafe_allow_html=True)
294
-
295
- # Code example
296
  st.code("""
297
  import pandas as pd
298
 
299
  # Read the Excel file
300
  data = pd.read_excel('path_to_file.xlsx')
 
 
301
 
302
- print(data.head()) # displays first 5 rows in excel file
303
- """, language="python")
 
304
 
305
- st.write("### Working with Sheets in Excel")
306
-
307
  # Importing a Single Sheet
308
  st.write("#### Importing a Single Excel Sheet")
309
  st.code("""
310
  df = pd.read_excel('path_to_file.xlsx', sheet_name=0)
311
  print(df)
312
  """, language="python")
313
-
314
  # Importing Multiple Sheets
315
  st.write("#### Importing Multiple Sheets from Excel")
316
  st.code("""
@@ -320,9 +323,12 @@ elif st.session_state.current_page == "explore_excel":
320
  print(data.head())
321
  """, language="python")
322
 
323
- st.write("### Exporting Data to Excel Files")
324
-
325
- # Exporting a Single DataFrame to Excel
 
 
 
326
  st.write("#### Exporting a Single DataFrame")
327
  st.code("""
328
  data = pd.DataFrame({
@@ -333,8 +339,8 @@ elif st.session_state.current_page == "explore_excel":
333
  # Export the DataFrame to an Excel file
334
  data.to_excel('single_sheet_output.xlsx', index=False)
335
  """, language="python")
336
-
337
- # Exporting Multiple DataFrames to Multiple Sheets
338
  st.write("#### Exporting Multiple DataFrames to Different Sheets")
339
  st.code("""
340
  data1 = pd.DataFrame({
@@ -359,43 +365,54 @@ elif st.session_state.current_page == "explore_excel":
359
  data3.to_excel(writer, sheet_name='Language Scores', index=False)
360
  """, language="python")
361
 
362
- st.write("### Common Issues with Excel Files")
 
 
 
363
 
364
  # 1. File Format Compatibility
365
- st.write("#### 1. File Format Compatibility")
 
 
366
  st.write("Excel files may come in different formats like `.xls` and `.xlsx`, which can lead to compatibility issues.")
367
  st.code("""
368
  data = pd.read_excel('file.xls', engine='xlrd') # For .xls files
369
  data = pd.read_excel('file.xlsx', engine='openpyxl') # For .xlsx files
370
  print(data.head())
371
  """, language="python")
372
-
373
  # 2. Encoding Issues
374
- st.write("#### 2. Encoding Issues")
 
 
375
  st.write("Sometimes Excel files might have special characters that cause encoding problems.")
376
  st.code("""
377
  data = pd.read_excel('file.xlsx', encoding='utf-8') # Replace with the correct encoding
378
  print(data.head())
379
  """, language="python")
380
-
381
  # 3. Missing or Incomplete Data
382
- st.write("#### 3. Missing or Incomplete Data")
 
 
383
  st.write("Missing values can lead to errors during data processing.")
384
  st.code("""
385
  data = pd.read_excel('file.xlsx')
386
  data.fillna(0, inplace=True) # Replace NaN values with 0 or other defaults
387
  print(data.head())
388
  """, language="python")
389
-
390
  # 4. Large File Sizes
391
- st.write("#### 4. Large File Sizes")
 
 
392
  st.write("Large Excel files may cause performance issues or run out of memory.")
393
  st.code("""
394
  chunk_size = 1000
395
  for chunk in pd.read_excel('large_file.xlsx', chunksize=chunk_size):
396
  print(chunk.head())
397
  """, language="python")
398
-
399
  # 5. Sheet Name Selection
400
  st.write("#### 5. Sheet Name Selection")
401
  st.write("Excel files may have multiple sheets, and reading the wrong one can lead to incorrect analysis.")
@@ -413,8 +430,7 @@ elif st.session_state.current_page == "explore_excel":
413
  print(data.dtypes)
414
  """, language="python")
415
 
416
-
417
- # 8. Merged Cells
418
  st.write("#### 7. Merged Cells")
419
  st.write("Merged cells in Excel can lead to missing or misaligned data.")
420
  st.code("""
@@ -423,7 +439,7 @@ elif st.session_state.current_page == "explore_excel":
423
  """, language="python")
424
 
425
 
426
- # 10. Date Parsing
427
  st.write("#### 8. Date Parsing")
428
  st.write("Dates in Excel files may not be interpreted correctly.")
429
  st.code("""
@@ -431,12 +447,13 @@ elif st.session_state.current_page == "explore_excel":
431
  print(data.dtypes)
432
  """, language="python")
433
 
 
434
  col1 = st.columns(1)
435
-
436
  with col1:
437
  if st.button("⬅️ Back to Previous Page"):
438
  navigate_to("main")
439
 
 
440
 
441
 
442
  elif st.session_state.current_page == "explore_images_video":
 
267
  if st.button("🌐 HTML"):
268
  navigate_to("explore_html")
269
 
270
+
271
+ # Page for Explore Excel
272
+ if st.session_state.current_page == "explore_excel":
273
+ # Main Heading
274
  st.markdown("""
275
  <h2 style="color: #BB3385;">Excel</h2>
276
  """, unsafe_allow_html=True)
277
+
278
+ # Overview Section
279
  st.write("""
280
  - **Excel** is a powerful spreadsheet software developed by Microsoft.
281
  - It is widely used for:
 
290
  - Excel is an essential tool for managing and analyzing structured data in various industries.
291
  """)
292
 
293
+ # Reading Excel Files Section
294
  st.markdown("""
295
  <h3 style="color: #5b2c6f;">Reading Excel Files in Python</h3>
296
  """, unsafe_allow_html=True)
297
+
 
298
  st.code("""
299
  import pandas as pd
300
 
301
  # Read the Excel file
302
  data = pd.read_excel('path_to_file.xlsx')
303
+ print(data.head()) # Displays first 5 rows in Excel file
304
+ """, language="python")
305
 
306
+ st.markdown("""
307
+ <h3 style="color: #5b2c6f;">Working with Sheets in Excel</h3>
308
+ """, unsafe_allow_html=True)
309
 
 
 
310
  # Importing a Single Sheet
311
  st.write("#### Importing a Single Excel Sheet")
312
  st.code("""
313
  df = pd.read_excel('path_to_file.xlsx', sheet_name=0)
314
  print(df)
315
  """, language="python")
316
+
317
  # Importing Multiple Sheets
318
  st.write("#### Importing Multiple Sheets from Excel")
319
  st.code("""
 
323
  print(data.head())
324
  """, language="python")
325
 
326
+ # Exporting Data Section
327
+ st.markdown("""
328
+ <h3 style="color: #5b2c6f;">Exporting Data to Excel Files</h3>
329
+ """, unsafe_allow_html=True)
330
+
331
+ # Exporting a Single DataFrame
332
  st.write("#### Exporting a Single DataFrame")
333
  st.code("""
334
  data = pd.DataFrame({
 
339
  # Export the DataFrame to an Excel file
340
  data.to_excel('single_sheet_output.xlsx', index=False)
341
  """, language="python")
342
+
343
+ # Exporting Multiple DataFrames
344
  st.write("#### Exporting Multiple DataFrames to Different Sheets")
345
  st.code("""
346
  data1 = pd.DataFrame({
 
365
  data3.to_excel(writer, sheet_name='Language Scores', index=False)
366
  """, language="python")
367
 
368
+ # Issues Section
369
+ st.markdown("""
370
+ <h3 style="color: #BB3385;">Common Issues with Excel Files</h3>
371
+ """, unsafe_allow_html=True)
372
 
373
  # 1. File Format Compatibility
374
+ st.markdown("""
375
+ <h4 style="color: #5b2c6f;">1. File Format Compatibility</h4>
376
+ """, unsafe_allow_html=True)
377
  st.write("Excel files may come in different formats like `.xls` and `.xlsx`, which can lead to compatibility issues.")
378
  st.code("""
379
  data = pd.read_excel('file.xls', engine='xlrd') # For .xls files
380
  data = pd.read_excel('file.xlsx', engine='openpyxl') # For .xlsx files
381
  print(data.head())
382
  """, language="python")
383
+
384
  # 2. Encoding Issues
385
+ st.markdown("""
386
+ <h4 style="color: #5b2c6f;">2. Encoding Issues</h4>
387
+ """, unsafe_allow_html=True)
388
  st.write("Sometimes Excel files might have special characters that cause encoding problems.")
389
  st.code("""
390
  data = pd.read_excel('file.xlsx', encoding='utf-8') # Replace with the correct encoding
391
  print(data.head())
392
  """, language="python")
393
+
394
  # 3. Missing or Incomplete Data
395
+ st.markdown("""
396
+ <h4 style="color: #5b2c6f;">3. Missing or Incomplete Data</h4>
397
+ """, unsafe_allow_html=True)
398
  st.write("Missing values can lead to errors during data processing.")
399
  st.code("""
400
  data = pd.read_excel('file.xlsx')
401
  data.fillna(0, inplace=True) # Replace NaN values with 0 or other defaults
402
  print(data.head())
403
  """, language="python")
404
+
405
  # 4. Large File Sizes
406
+ st.markdown("""
407
+ <h4 style="color: #5b2c6f;">4. Large File Sizes</h4>
408
+ """, unsafe_allow_html=True)
409
  st.write("Large Excel files may cause performance issues or run out of memory.")
410
  st.code("""
411
  chunk_size = 1000
412
  for chunk in pd.read_excel('large_file.xlsx', chunksize=chunk_size):
413
  print(chunk.head())
414
  """, language="python")
415
+
416
  # 5. Sheet Name Selection
417
  st.write("#### 5. Sheet Name Selection")
418
  st.write("Excel files may have multiple sheets, and reading the wrong one can lead to incorrect analysis.")
 
430
  print(data.dtypes)
431
  """, language="python")
432
 
433
+ # 7. Merged Cells
 
434
  st.write("#### 7. Merged Cells")
435
  st.write("Merged cells in Excel can lead to missing or misaligned data.")
436
  st.code("""
 
439
  """, language="python")
440
 
441
 
442
+ # 8. Date Parsing
443
  st.write("#### 8. Date Parsing")
444
  st.write("Dates in Excel files may not be interpreted correctly.")
445
  st.code("""
 
447
  print(data.dtypes)
448
  """, language="python")
449
 
450
+ # Back Button
451
  col1 = st.columns(1)
 
452
  with col1:
453
  if st.button("⬅️ Back to Previous Page"):
454
  navigate_to("main")
455
 
456
+
457
 
458
 
459
  elif st.session_state.current_page == "explore_images_video":