LakshmiHarika commited on
Commit
e4e65de
·
verified ·
1 Parent(s): d8c327a

Update pages/Data Collection.py

Browse files
Files changed (1) hide show
  1. pages/Data Collection.py +165 -4
pages/Data Collection.py CHANGED
@@ -269,14 +269,175 @@ if st.session_state.current_page == "main":
269
 
270
  # Pages for Each Format
271
  elif st.session_state.current_page == "explore_excel":
 
272
  st.markdown("""
273
- <h3 style="color: #e25822;">Exploring Excel</h3>
274
  """, unsafe_allow_html=True)
 
275
  st.write("""
276
- Excel is a structured data format used to store and analyze data in tabular form. It supports features like formulas, charts, and pivot tables.
 
 
 
 
 
 
 
 
 
 
277
  """)
278
- if st.button("Go Back"):
279
- navigate_to("main")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
 
281
  elif st.session_state.current_page == "explore_images_video":
282
  st.markdown("""
 
269
 
270
  # Pages for Each Format
271
  elif st.session_state.current_page == "explore_excel":
272
+ # Section about Excel
273
  st.markdown("""
274
+ <h2 style="color: #BB3385;">Excel</h2>
275
  """, unsafe_allow_html=True)
276
+
277
  st.write("""
278
+ - **Excel** is a powerful spreadsheet software developed by Microsoft.
279
+ - It is widely used for:
280
+ - Data organization
281
+ - Analysis
282
+ - Visualization
283
+ - Key features include:
284
+ - Storing data in tabular format
285
+ - Performing complex calculations
286
+ - Creating charts
287
+ - Applying various data manipulation techniques
288
+ - Excel is an essential tool for managing and analyzing structured data in various industries.
289
  """)
290
+
291
+ st.markdown("""
292
+ <h3 style="color: #5b2c6f;">Reading Excel Files in Python</h3>
293
+ """, unsafe_allow_html=True)
294
+
295
+ # Code example
296
+ st.code("""
297
+ import pandas as pd
298
+
299
+ # Read the Excel file
300
+ data = pd.read_excel('path_to_file.xlsx')
301
+
302
+ print(data.head()) # displays first 5 rows in excel file
303
+ """, language="python")
304
+
305
+ st.write("### Working with Sheets in Excel")
306
+
307
+ # Importing a Single Sheet
308
+ st.write("#### Importing a Single Excel Sheet")
309
+ st.code("""
310
+ df = pd.read_excel('path_to_file.xlsx', sheet_name=0)
311
+ print(df)
312
+ """, language="python")
313
+
314
+ # Importing Multiple Sheets
315
+ st.write("#### Importing Multiple Sheets from Excel")
316
+ st.code("""
317
+ df_dict = pd.read_excel('path_to_file.xlsx', sheet_name=[0, 1, 2])
318
+ for sheet, data in df_dict.items():
319
+ print(f"Sheet: {sheet}")
320
+ print(data.head())
321
+ """, language="python")
322
+
323
+ st.write("### Exporting Data to Excel Files")
324
+
325
+ # Exporting a Single DataFrame to Excel
326
+ st.write("#### Exporting a Single DataFrame")
327
+ st.code("""
328
+ data = pd.DataFrame({
329
+ 'name': ['a', 'b', 'c', 'd'],
330
+ 'age': [12, 23, 44, 43]
331
+ })
332
+
333
+ # Export the DataFrame to an Excel file
334
+ data.to_excel('single_sheet_output.xlsx', index=False)
335
+ """, language="python")
336
+
337
+ # Exporting Multiple DataFrames to Multiple Sheets
338
+ st.write("#### Exporting Multiple DataFrames to Different Sheets")
339
+ st.code("""
340
+ data1 = pd.DataFrame({
341
+ 'name': ['a', 'b', 'c', 'd'],
342
+ 'age': [12, 23, 44, 43]
343
+ })
344
+
345
+ data2 = pd.DataFrame({
346
+ 'maths': [43, 32, 45, 45],
347
+ 'science': [32, 54, 45, 13]
348
+ })
349
+
350
+ data3 = pd.DataFrame({
351
+ 'hindi': [32, 45, 53, 53],
352
+ 'english': [53, 32, 24, 65]
353
+ })
354
+
355
+ # Export multiple DataFrames to an Excel file with multiple sheets
356
+ with pd.ExcelWriter('multi_sheet_output.xlsx') as writer:
357
+ data1.to_excel(writer, sheet_name='Personal Info', index=False)
358
+ data2.to_excel(writer, sheet_name='Academic Scores', index=False)
359
+ data3.to_excel(writer, sheet_name='Language Scores', index=False)
360
+ """, language="python")
361
+
362
+ st.write("### Common Issues with Excel Files")
363
+
364
+ # 1. File Format Compatibility
365
+ st.write("#### 1. File Format Compatibility")
366
+ st.write("Excel files may come in different formats like `.xls` and `.xlsx`, which can lead to compatibility issues.")
367
+ st.code("""
368
+ data = pd.read_excel('file.xls', engine='xlrd') # For .xls files
369
+ data = pd.read_excel('file.xlsx', engine='openpyxl') # For .xlsx files
370
+ print(data.head())
371
+ """, language="python")
372
+
373
+ # 2. Encoding Issues
374
+ st.write("#### 2. Encoding Issues")
375
+ st.write("Sometimes Excel files might have special characters that cause encoding problems.")
376
+ st.code("""
377
+ data = pd.read_excel('file.xlsx', encoding='utf-8') # Replace with the correct encoding
378
+ print(data.head())
379
+ """, language="python")
380
+
381
+ # 3. Missing or Incomplete Data
382
+ st.write("#### 3. Missing or Incomplete Data")
383
+ st.write("Missing values can lead to errors during data processing.")
384
+ st.code("""
385
+ data = pd.read_excel('file.xlsx')
386
+ data.fillna(0, inplace=True) # Replace NaN values with 0 or other defaults
387
+ print(data.head())
388
+ """, language="python")
389
+
390
+ # 4. Large File Sizes
391
+ st.write("#### 4. Large File Sizes")
392
+ st.write("Large Excel files may cause performance issues or run out of memory.")
393
+ st.code("""
394
+ chunk_size = 1000
395
+ for chunk in pd.read_excel('large_file.xlsx', chunksize=chunk_size):
396
+ print(chunk.head())
397
+ """, language="python")
398
+
399
+ # 5. Sheet Name Selection
400
+ st.write("#### 5. Sheet Name Selection")
401
+ st.write("Excel files may have multiple sheets, and reading the wrong one can lead to incorrect analysis.")
402
+ st.code("""
403
+ data = pd.read_excel('file.xlsx', sheet_name='Sheet1')
404
+ print(data.head())
405
+ """, language="python")
406
+
407
+ # 6. Data Type Conversion
408
+ st.write("#### 6. Data Type Conversion")
409
+ st.write("Excel files may have columns with inconsistent or incorrect data types.")
410
+ st.code("""
411
+ data = pd.read_excel('file.xlsx')
412
+ data['column_name'] = data['column_name'].astype(int)
413
+ print(data.dtypes)
414
+ """, language="python")
415
+
416
+
417
+ # 8. Merged Cells
418
+ st.write("#### 7. Merged Cells")
419
+ st.write("Merged cells in Excel can lead to missing or misaligned data.")
420
+ st.code("""
421
+ data = pd.read_excel('file.xlsx', merge_cells=False)
422
+ print(data.head())
423
+ """, language="python")
424
+
425
+
426
+ # 10. Date Parsing
427
+ st.write("#### 8. Date Parsing")
428
+ st.write("Dates in Excel files may not be interpreted correctly.")
429
+ st.code("""
430
+ data = pd.read_excel('file.xlsx', parse_dates=['date_column'])
431
+ print(data.dtypes)
432
+ """, language="python")
433
+
434
+ col1 = st.columns(1)
435
+
436
+ with col1:
437
+ if st.button("⬅️ Back to Previous Page"):
438
+ navigate_to("main")
439
+
440
+
441
 
442
  elif st.session_state.current_page == "explore_images_video":
443
  st.markdown("""