Spaces:
Sleeping
Sleeping
Update pages/Data Collection.py
Browse files- pages/Data Collection.py +29 -17
pages/Data Collection.py
CHANGED
|
@@ -412,43 +412,55 @@ if st.session_state.current_page == "explore_excel":
|
|
| 412 |
for chunk in pd.read_excel('large_file.xlsx', chunksize=chunk_size):
|
| 413 |
print(chunk.head())
|
| 414 |
""", language="python")
|
| 415 |
-
|
| 416 |
# 5. Sheet Name Selection
|
| 417 |
-
st.
|
|
|
|
|
|
|
| 418 |
st.write("Excel files may have multiple sheets, and reading the wrong one can lead to incorrect analysis.")
|
| 419 |
st.code("""
|
|
|
|
| 420 |
data = pd.read_excel('file.xlsx', sheet_name='Sheet1')
|
| 421 |
print(data.head())
|
| 422 |
""", language="python")
|
| 423 |
|
| 424 |
# 6. Data Type Conversion
|
| 425 |
-
st.
|
|
|
|
|
|
|
| 426 |
st.write("Excel files may have columns with inconsistent or incorrect data types.")
|
| 427 |
st.code("""
|
|
|
|
| 428 |
data = pd.read_excel('file.xlsx')
|
| 429 |
-
data['column_name'] = data['column_name'].astype(int)
|
| 430 |
print(data.dtypes)
|
| 431 |
""", language="python")
|
| 432 |
-
|
| 433 |
-
# 7.
|
| 434 |
-
st.
|
| 435 |
-
|
|
|
|
|
|
|
| 436 |
st.code("""
|
| 437 |
-
|
|
|
|
|
|
|
|
|
|
| 438 |
print(data.head())
|
| 439 |
""", language="python")
|
| 440 |
-
|
| 441 |
|
| 442 |
-
# 8.
|
| 443 |
-
st.
|
| 444 |
-
|
|
|
|
|
|
|
| 445 |
st.code("""
|
| 446 |
-
|
| 447 |
-
|
|
|
|
| 448 |
""", language="python")
|
| 449 |
|
| 450 |
-
#
|
| 451 |
-
col1 = st.columns(1)
|
| 452 |
with col1:
|
| 453 |
if st.button("⬅️ Back to Previous Page"):
|
| 454 |
navigate_to("main")
|
|
|
|
| 412 |
for chunk in pd.read_excel('large_file.xlsx', chunksize=chunk_size):
|
| 413 |
print(chunk.head())
|
| 414 |
""", language="python")
|
| 415 |
+
|
| 416 |
# 5. Sheet Name Selection
|
| 417 |
+
st.markdown("""
|
| 418 |
+
<h4 style="color: #5b2c6f;">5. Sheet Name Selection</h4>
|
| 419 |
+
""", unsafe_allow_html=True)
|
| 420 |
st.write("Excel files may have multiple sheets, and reading the wrong one can lead to incorrect analysis.")
|
| 421 |
st.code("""
|
| 422 |
+
# Specify the sheet name explicitly
|
| 423 |
data = pd.read_excel('file.xlsx', sheet_name='Sheet1')
|
| 424 |
print(data.head())
|
| 425 |
""", language="python")
|
| 426 |
|
| 427 |
# 6. Data Type Conversion
|
| 428 |
+
st.markdown("""
|
| 429 |
+
<h4 style="color: #5b2c6f;">6. Data Type Conversion</h4>
|
| 430 |
+
""", unsafe_allow_html=True)
|
| 431 |
st.write("Excel files may have columns with inconsistent or incorrect data types.")
|
| 432 |
st.code("""
|
| 433 |
+
# Convert columns to appropriate data types
|
| 434 |
data = pd.read_excel('file.xlsx')
|
| 435 |
+
data['column_name'] = data['column_name'].astype(int) # Replace 'column_name' with your column
|
| 436 |
print(data.dtypes)
|
| 437 |
""", language="python")
|
| 438 |
+
|
| 439 |
+
# 7. Hidden Characters or Whitespace
|
| 440 |
+
st.markdown("""
|
| 441 |
+
<h4 style="color: #5b2c6f;">7. Hidden Characters or Whitespace</h4>
|
| 442 |
+
""", unsafe_allow_html=True)
|
| 443 |
+
st.write("Whitespace or hidden characters in the data can cause parsing issues.")
|
| 444 |
st.code("""
|
| 445 |
+
# Remove leading/trailing whitespaces
|
| 446 |
+
data = pd.read_excel('file.xlsx')
|
| 447 |
+
data.columns = data.columns.str.strip() # Remove whitespace from column names
|
| 448 |
+
data['column_name'] = data['column_name'].str.strip() # Clean specific column
|
| 449 |
print(data.head())
|
| 450 |
""", language="python")
|
|
|
|
| 451 |
|
| 452 |
+
# 8. Merged Cells
|
| 453 |
+
st.markdown("""
|
| 454 |
+
<h4 style="color: #5b2c6f;">8. Merged Cells</h4>
|
| 455 |
+
""", unsafe_allow_html=True)
|
| 456 |
+
st.write("Merged cells in Excel can lead to missing or misaligned data.")
|
| 457 |
st.code("""
|
| 458 |
+
# Handle merged cells by filling forward
|
| 459 |
+
data = pd.read_excel('file.xlsx', merge_cells=False) # Disable merging
|
| 460 |
+
print(data.head())
|
| 461 |
""", language="python")
|
| 462 |
|
| 463 |
+
col1 = st.columns(1)[0] # Access the first (and only) column from the list of columns
|
|
|
|
| 464 |
with col1:
|
| 465 |
if st.button("⬅️ Back to Previous Page"):
|
| 466 |
navigate_to("main")
|