Harika22 commited on
Commit
c234d65
·
verified ·
1 Parent(s): 09c6f7f

Update pages/5_Structured_data.py

Browse files
Files changed (1) hide show
  1. pages/5_Structured_data.py +48 -11
pages/5_Structured_data.py CHANGED
@@ -71,15 +71,52 @@ st.markdown(
71
  """, unsafe_allow_html=True)
72
 
73
  st.title("📂Handling Excel files📂")
74
- st.markdown("Excel is a powerful spreadsheet application used for data analysis, visualization, and performing calculations with a grid of cells organized in rows and columns.")
75
- st.subheader("How to read excel files?..📃")
76
- rcode = '''
77
- df = pd.read_excel(r"C:\Users\Harika\Downloads\f12.xlsx")
78
- '''
79
- st.code(rcode, language="python")
80
 
81
- st.subheader("📑How to read an excel files with multiple sheets?...")
82
- mcode = '''
83
- df = pd.read_excel(r"C:\Users\Harika\Downloads\hari.xlsx",sheet_name=[0, 1, 2])
84
- '''
85
- st.code(mcode, language="python")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  """, unsafe_allow_html=True)
72
 
73
  st.title("📂Handling Excel files📂")
74
+ st.markdown(''' - Excel is a widely used software application for organizing,
75
+ storing, and analyzing data in tabular format.
76
+
77
+ - It is a spreadsheet tool that allows users to work with rows, columns, and cells to manage numerical or textual data.
78
+ - Excel files are typically saved with extensions like .xls or .xlsx.''')
 
79
 
80
+ st.header('**How to Read These Files:**')
81
+ st.subheader('''**Using Python Libraries:**''')
82
+ st.code('''
83
+ import pandas as pd
84
+ # Reading an Excel file
85
+ df = pd.read_excel('file.xlsx')
86
+ print(df.head())''')
87
+ st.header('**Issues in Excel:**')
88
+ st.markdown('''
89
+ 1. **File Format Issues:**
90
+ - `.xls` and `.xlsx` are different formats.
91
+ 2. **Corrupted Files:**
92
+ - Files may get corrupted during transfer or storage, making them unreadable.
93
+ 3. **Encoding Issues:**
94
+ - Data with special characters or non-`UTF-8` encoding can cause errors.''')
95
+ st.write('**Solution:**')
96
+ st.code('''
97
+ df = pd.read_excel('file.xlsx', encoding='utf-8')
98
+ ''')
99
+ st.markdown('''
100
+ 4. **Missing Values:**
101
+ - Cells with missing or `NaN values` may disrupt data processing.''')
102
+ st.write('**Solution:**')
103
+ st.code('''
104
+ df.fillna(0, inplace=True)
105
+ df.dropna(inplace=True)
106
+ ''')
107
+ st.markdown('''
108
+ 5. **Large File Size:**
109
+ - Handling very large Excel files can result in memory issues.''')
110
+ st.write('**Solution:**')
111
+ st.code('''
112
+ chunks = pd.read_excel('large_file.xlsx', chunksize=10000)
113
+ for chunk in chunks:
114
+ print(chunk)
115
+ ''')
116
+ st.markdown('''
117
+ 6. **Multiple Sheets:**
118
+ - Huge files may have multiple sheets, making it harder to extract relevant data.''')
119
+ st.write('**Solution:**')
120
+ st.code('''
121
+ df = pd.read_excel('file.xlsx', sheet_name=[0,1,2])
122
+ ''')