Spaces:
Sleeping
Sleeping
Update pages/Data Collection.py
Browse files- pages/Data Collection.py +46 -29
pages/Data Collection.py
CHANGED
|
@@ -267,13 +267,15 @@ if st.session_state.current_page == "main":
|
|
| 267 |
if st.button("🌐 HTML"):
|
| 268 |
navigate_to("explore_html")
|
| 269 |
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
|
|
|
| 273 |
st.markdown("""
|
| 274 |
<h2 style="color: #BB3385;">Excel</h2>
|
| 275 |
""", unsafe_allow_html=True)
|
| 276 |
-
|
|
|
|
| 277 |
st.write("""
|
| 278 |
- **Excel** is a powerful spreadsheet software developed by Microsoft.
|
| 279 |
- It is widely used for:
|
|
@@ -288,29 +290,30 @@ elif st.session_state.current_page == "explore_excel":
|
|
| 288 |
- Excel is an essential tool for managing and analyzing structured data in various industries.
|
| 289 |
""")
|
| 290 |
|
|
|
|
| 291 |
st.markdown("""
|
| 292 |
<h3 style="color: #5b2c6f;">Reading Excel Files in Python</h3>
|
| 293 |
""", unsafe_allow_html=True)
|
| 294 |
-
|
| 295 |
-
# Code example
|
| 296 |
st.code("""
|
| 297 |
import pandas as pd
|
| 298 |
|
| 299 |
# Read the Excel file
|
| 300 |
data = pd.read_excel('path_to_file.xlsx')
|
|
|
|
|
|
|
| 301 |
|
| 302 |
-
|
| 303 |
-
|
|
|
|
| 304 |
|
| 305 |
-
st.write("### Working with Sheets in Excel")
|
| 306 |
-
|
| 307 |
# Importing a Single Sheet
|
| 308 |
st.write("#### Importing a Single Excel Sheet")
|
| 309 |
st.code("""
|
| 310 |
df = pd.read_excel('path_to_file.xlsx', sheet_name=0)
|
| 311 |
print(df)
|
| 312 |
""", language="python")
|
| 313 |
-
|
| 314 |
# Importing Multiple Sheets
|
| 315 |
st.write("#### Importing Multiple Sheets from Excel")
|
| 316 |
st.code("""
|
|
@@ -320,9 +323,12 @@ elif st.session_state.current_page == "explore_excel":
|
|
| 320 |
print(data.head())
|
| 321 |
""", language="python")
|
| 322 |
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
#
|
|
|
|
|
|
|
|
|
|
| 326 |
st.write("#### Exporting a Single DataFrame")
|
| 327 |
st.code("""
|
| 328 |
data = pd.DataFrame({
|
|
@@ -333,8 +339,8 @@ elif st.session_state.current_page == "explore_excel":
|
|
| 333 |
# Export the DataFrame to an Excel file
|
| 334 |
data.to_excel('single_sheet_output.xlsx', index=False)
|
| 335 |
""", language="python")
|
| 336 |
-
|
| 337 |
-
# Exporting Multiple DataFrames
|
| 338 |
st.write("#### Exporting Multiple DataFrames to Different Sheets")
|
| 339 |
st.code("""
|
| 340 |
data1 = pd.DataFrame({
|
|
@@ -359,43 +365,54 @@ elif st.session_state.current_page == "explore_excel":
|
|
| 359 |
data3.to_excel(writer, sheet_name='Language Scores', index=False)
|
| 360 |
""", language="python")
|
| 361 |
|
| 362 |
-
|
|
|
|
|
|
|
|
|
|
| 363 |
|
| 364 |
# 1. File Format Compatibility
|
| 365 |
-
st.
|
|
|
|
|
|
|
| 366 |
st.write("Excel files may come in different formats like `.xls` and `.xlsx`, which can lead to compatibility issues.")
|
| 367 |
st.code("""
|
| 368 |
data = pd.read_excel('file.xls', engine='xlrd') # For .xls files
|
| 369 |
data = pd.read_excel('file.xlsx', engine='openpyxl') # For .xlsx files
|
| 370 |
print(data.head())
|
| 371 |
""", language="python")
|
| 372 |
-
|
| 373 |
# 2. Encoding Issues
|
| 374 |
-
st.
|
|
|
|
|
|
|
| 375 |
st.write("Sometimes Excel files might have special characters that cause encoding problems.")
|
| 376 |
st.code("""
|
| 377 |
data = pd.read_excel('file.xlsx', encoding='utf-8') # Replace with the correct encoding
|
| 378 |
print(data.head())
|
| 379 |
""", language="python")
|
| 380 |
-
|
| 381 |
# 3. Missing or Incomplete Data
|
| 382 |
-
st.
|
|
|
|
|
|
|
| 383 |
st.write("Missing values can lead to errors during data processing.")
|
| 384 |
st.code("""
|
| 385 |
data = pd.read_excel('file.xlsx')
|
| 386 |
data.fillna(0, inplace=True) # Replace NaN values with 0 or other defaults
|
| 387 |
print(data.head())
|
| 388 |
""", language="python")
|
| 389 |
-
|
| 390 |
# 4. Large File Sizes
|
| 391 |
-
st.
|
|
|
|
|
|
|
| 392 |
st.write("Large Excel files may cause performance issues or run out of memory.")
|
| 393 |
st.code("""
|
| 394 |
chunk_size = 1000
|
| 395 |
for chunk in pd.read_excel('large_file.xlsx', chunksize=chunk_size):
|
| 396 |
print(chunk.head())
|
| 397 |
""", language="python")
|
| 398 |
-
|
| 399 |
# 5. Sheet Name Selection
|
| 400 |
st.write("#### 5. Sheet Name Selection")
|
| 401 |
st.write("Excel files may have multiple sheets, and reading the wrong one can lead to incorrect analysis.")
|
|
@@ -413,8 +430,7 @@ elif st.session_state.current_page == "explore_excel":
|
|
| 413 |
print(data.dtypes)
|
| 414 |
""", language="python")
|
| 415 |
|
| 416 |
-
|
| 417 |
-
# 8. Merged Cells
|
| 418 |
st.write("#### 7. Merged Cells")
|
| 419 |
st.write("Merged cells in Excel can lead to missing or misaligned data.")
|
| 420 |
st.code("""
|
|
@@ -423,7 +439,7 @@ elif st.session_state.current_page == "explore_excel":
|
|
| 423 |
""", language="python")
|
| 424 |
|
| 425 |
|
| 426 |
-
#
|
| 427 |
st.write("#### 8. Date Parsing")
|
| 428 |
st.write("Dates in Excel files may not be interpreted correctly.")
|
| 429 |
st.code("""
|
|
@@ -431,12 +447,13 @@ elif st.session_state.current_page == "explore_excel":
|
|
| 431 |
print(data.dtypes)
|
| 432 |
""", language="python")
|
| 433 |
|
|
|
|
| 434 |
col1 = st.columns(1)
|
| 435 |
-
|
| 436 |
with col1:
|
| 437 |
if st.button("⬅️ Back to Previous Page"):
|
| 438 |
navigate_to("main")
|
| 439 |
|
|
|
|
| 440 |
|
| 441 |
|
| 442 |
elif st.session_state.current_page == "explore_images_video":
|
|
|
|
| 267 |
if st.button("🌐 HTML"):
|
| 268 |
navigate_to("explore_html")
|
| 269 |
|
| 270 |
+
|
| 271 |
+
# Page for Explore Excel
|
| 272 |
+
if st.session_state.current_page == "explore_excel":
|
| 273 |
+
# Main Heading
|
| 274 |
st.markdown("""
|
| 275 |
<h2 style="color: #BB3385;">Excel</h2>
|
| 276 |
""", unsafe_allow_html=True)
|
| 277 |
+
|
| 278 |
+
# Overview Section
|
| 279 |
st.write("""
|
| 280 |
- **Excel** is a powerful spreadsheet software developed by Microsoft.
|
| 281 |
- It is widely used for:
|
|
|
|
| 290 |
- Excel is an essential tool for managing and analyzing structured data in various industries.
|
| 291 |
""")
|
| 292 |
|
| 293 |
+
# Reading Excel Files Section
|
| 294 |
st.markdown("""
|
| 295 |
<h3 style="color: #5b2c6f;">Reading Excel Files in Python</h3>
|
| 296 |
""", unsafe_allow_html=True)
|
| 297 |
+
|
|
|
|
| 298 |
st.code("""
|
| 299 |
import pandas as pd
|
| 300 |
|
| 301 |
# Read the Excel file
|
| 302 |
data = pd.read_excel('path_to_file.xlsx')
|
| 303 |
+
print(data.head()) # Displays first 5 rows in Excel file
|
| 304 |
+
""", language="python")
|
| 305 |
|
| 306 |
+
st.markdown("""
|
| 307 |
+
<h3 style="color: #5b2c6f;">Working with Sheets in Excel</h3>
|
| 308 |
+
""", unsafe_allow_html=True)
|
| 309 |
|
|
|
|
|
|
|
| 310 |
# Importing a Single Sheet
|
| 311 |
st.write("#### Importing a Single Excel Sheet")
|
| 312 |
st.code("""
|
| 313 |
df = pd.read_excel('path_to_file.xlsx', sheet_name=0)
|
| 314 |
print(df)
|
| 315 |
""", language="python")
|
| 316 |
+
|
| 317 |
# Importing Multiple Sheets
|
| 318 |
st.write("#### Importing Multiple Sheets from Excel")
|
| 319 |
st.code("""
|
|
|
|
| 323 |
print(data.head())
|
| 324 |
""", language="python")
|
| 325 |
|
| 326 |
+
# Exporting Data Section
|
| 327 |
+
st.markdown("""
|
| 328 |
+
<h3 style="color: #5b2c6f;">Exporting Data to Excel Files</h3>
|
| 329 |
+
""", unsafe_allow_html=True)
|
| 330 |
+
|
| 331 |
+
# Exporting a Single DataFrame
|
| 332 |
st.write("#### Exporting a Single DataFrame")
|
| 333 |
st.code("""
|
| 334 |
data = pd.DataFrame({
|
|
|
|
| 339 |
# Export the DataFrame to an Excel file
|
| 340 |
data.to_excel('single_sheet_output.xlsx', index=False)
|
| 341 |
""", language="python")
|
| 342 |
+
|
| 343 |
+
# Exporting Multiple DataFrames
|
| 344 |
st.write("#### Exporting Multiple DataFrames to Different Sheets")
|
| 345 |
st.code("""
|
| 346 |
data1 = pd.DataFrame({
|
|
|
|
| 365 |
data3.to_excel(writer, sheet_name='Language Scores', index=False)
|
| 366 |
""", language="python")
|
| 367 |
|
| 368 |
+
# Issues Section
|
| 369 |
+
st.markdown("""
|
| 370 |
+
<h3 style="color: #BB3385;">Common Issues with Excel Files</h3>
|
| 371 |
+
""", unsafe_allow_html=True)
|
| 372 |
|
| 373 |
# 1. File Format Compatibility
|
| 374 |
+
st.markdown("""
|
| 375 |
+
<h4 style="color: #5b2c6f;">1. File Format Compatibility</h4>
|
| 376 |
+
""", unsafe_allow_html=True)
|
| 377 |
st.write("Excel files may come in different formats like `.xls` and `.xlsx`, which can lead to compatibility issues.")
|
| 378 |
st.code("""
|
| 379 |
data = pd.read_excel('file.xls', engine='xlrd') # For .xls files
|
| 380 |
data = pd.read_excel('file.xlsx', engine='openpyxl') # For .xlsx files
|
| 381 |
print(data.head())
|
| 382 |
""", language="python")
|
| 383 |
+
|
| 384 |
# 2. Encoding Issues
|
| 385 |
+
st.markdown("""
|
| 386 |
+
<h4 style="color: #5b2c6f;">2. Encoding Issues</h4>
|
| 387 |
+
""", unsafe_allow_html=True)
|
| 388 |
st.write("Sometimes Excel files might have special characters that cause encoding problems.")
|
| 389 |
st.code("""
|
| 390 |
data = pd.read_excel('file.xlsx', encoding='utf-8') # Replace with the correct encoding
|
| 391 |
print(data.head())
|
| 392 |
""", language="python")
|
| 393 |
+
|
| 394 |
# 3. Missing or Incomplete Data
|
| 395 |
+
st.markdown("""
|
| 396 |
+
<h4 style="color: #5b2c6f;">3. Missing or Incomplete Data</h4>
|
| 397 |
+
""", unsafe_allow_html=True)
|
| 398 |
st.write("Missing values can lead to errors during data processing.")
|
| 399 |
st.code("""
|
| 400 |
data = pd.read_excel('file.xlsx')
|
| 401 |
data.fillna(0, inplace=True) # Replace NaN values with 0 or other defaults
|
| 402 |
print(data.head())
|
| 403 |
""", language="python")
|
| 404 |
+
|
| 405 |
# 4. Large File Sizes
|
| 406 |
+
st.markdown("""
|
| 407 |
+
<h4 style="color: #5b2c6f;">4. Large File Sizes</h4>
|
| 408 |
+
""", unsafe_allow_html=True)
|
| 409 |
st.write("Large Excel files may cause performance issues or run out of memory.")
|
| 410 |
st.code("""
|
| 411 |
chunk_size = 1000
|
| 412 |
for chunk in pd.read_excel('large_file.xlsx', chunksize=chunk_size):
|
| 413 |
print(chunk.head())
|
| 414 |
""", language="python")
|
| 415 |
+
|
| 416 |
# 5. Sheet Name Selection
|
| 417 |
st.write("#### 5. Sheet Name Selection")
|
| 418 |
st.write("Excel files may have multiple sheets, and reading the wrong one can lead to incorrect analysis.")
|
|
|
|
| 430 |
print(data.dtypes)
|
| 431 |
""", language="python")
|
| 432 |
|
| 433 |
+
# 7. Merged Cells
|
|
|
|
| 434 |
st.write("#### 7. Merged Cells")
|
| 435 |
st.write("Merged cells in Excel can lead to missing or misaligned data.")
|
| 436 |
st.code("""
|
|
|
|
| 439 |
""", language="python")
|
| 440 |
|
| 441 |
|
| 442 |
+
# 8. Date Parsing
|
| 443 |
st.write("#### 8. Date Parsing")
|
| 444 |
st.write("Dates in Excel files may not be interpreted correctly.")
|
| 445 |
st.code("""
|
|
|
|
| 447 |
print(data.dtypes)
|
| 448 |
""", language="python")
|
| 449 |
|
| 450 |
+
# Back Button
|
| 451 |
col1 = st.columns(1)
|
|
|
|
| 452 |
with col1:
|
| 453 |
if st.button("⬅️ Back to Previous Page"):
|
| 454 |
navigate_to("main")
|
| 455 |
|
| 456 |
+
|
| 457 |
|
| 458 |
|
| 459 |
elif st.session_state.current_page == "explore_images_video":
|