Phani1008 commited on
Commit
d36028b
·
verified ·
1 Parent(s): cf5d1c6

Update pages/3_Life cycle of ML project.py

Browse files
Files changed (1) hide show
  1. pages/3_Life cycle of ML project.py +242 -1
pages/3_Life cycle of ML project.py CHANGED
@@ -161,4 +161,245 @@ def data_collection_page():
161
  st.subheader("Manual Collection")
162
  st.write("""
163
  - Collect data manually through surveys, questionnaires, interviews, or direct observations.
164
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  st.subheader("Manual Collection")
162
  st.write("""
163
  - Collect data manually through surveys, questionnaires, interviews, or direct observations.
164
+ """)
165
+
166
+
167
+
168
+
169
+
170
+
171
+ # Buttons for types of data
172
+ st.subheader("Data Types")
173
+ if st.button("Structured Data"):
174
+ st.session_state.previous_page = '2_data_collection'
175
+ st.session_state.page = 'structured_data'
176
+
177
+ if st.button("Semi-Structured Data"):
178
+ st.session_state.previous_page = '2_data_collection'
179
+ st.session_state.page = 'semi_structured_data'
180
+
181
+ if st.button("Unstructured Data"):
182
+ st.session_state.previous_page = '2_data_collection'
183
+ st.session_state.page = 'unstructured_data'
184
+
185
+ # Back button to return to main page
186
+ if st.button("Back to Main"):
187
+ st.session_state.previous_page = '2_data_collection'
188
+ st.session_state.page = 'main'
189
+
190
+
191
+ def structured_data_page():
192
+ st.title("Structured Data")
193
+ st.write("""
194
+ Structured data refers to data that is organized into a tabular format with rows and columns, such as in a database or spreadsheet.
195
+ """)
196
+
197
+ # Additional buttons for types of structured data
198
+ st.subheader("Types of Structured Data")
199
+ if st.button("Excel Files"):
200
+ st.session_state.page = 'excel_data'
201
+ if st.button("SQL Databases"):
202
+ st.session_state.page = 'sql_data'
203
+
204
+ if st.button("Back to Data Collection"):
205
+ st.session_state.page = '2_data_collection'
206
+
207
+
208
+ def semi_structured_data_page():
209
+ st.title("Semi-Structured Data")
210
+ st.write("""
211
+ Semi-structured data has some form of organization but is not as rigid as structured data. It may include elements such as tags, metadata, etc.
212
+ """)
213
+
214
+ # Additional buttons for types of semi-structured data
215
+ st.subheader("Types of Semi-Structured Data")
216
+ if st.button("CSV Files"):
217
+ st.session_state.page = 'csv_data'
218
+ if st.button("JSON Files"):
219
+ st.session_state.page = 'json_data'
220
+ if st.button("XML Files"):
221
+ st.session_state.page = 'xml_data'
222
+ if st.button("HTML Files"):
223
+ st.session_state.page = 'html_data'
224
+
225
+ if st.button("Back to Data Collection"):
226
+ st.session_state.page = '2_data_collection'
227
+
228
+
229
+ def unstructured_data_page():
230
+ st.title("Unstructured Data")
231
+ st.write("""
232
+ Unstructured data does not have a predefined format or structure. It includes data such as images, videos, and text.
233
+ """)
234
+
235
+ # Additional buttons for types of unstructured data
236
+ st.subheader("Types of Unstructured Data")
237
+ if st.button("Images"):
238
+ st.session_state.page = 'image_data'
239
+ if st.button("Videos"):
240
+ st.session_state.page = 'video_data'
241
+ if st.button("Audio"):
242
+ st.session_state.page = 'audio_data'
243
+ if st.button("Text"):
244
+ st.session_state.page = 'text_data'
245
+
246
+ if st.button("Back to Data Collection"):
247
+ st.session_state.page = '2_data_collection'
248
+
249
+
250
+ # Individual Data Pages (Examples)
251
+ def excel_data_page():
252
+ st.title("Handling Excel Files")
253
+ st.header("Understanding Data Format:")
254
+ st.markdown("""- Can only be created using applications like Microsoft Excel.
255
+ - It is always structured data because it organizes data in rows and columns.
256
+ - XLSX files are also called Workbooks because they can contain multiple sheets.""")
257
+ st.subheader("Workbook and Sheets")
258
+ st.markdown("""*An XLSX file is similar to a Book:*
259
+ - The Workbook acts as the book itself.
260
+ - Each Sheet inside the workbook is like a Page.
261
+ - Each Sheet can be thought of as an individual CSV file.
262
+ """)
263
+ st.markdown("""*Why Use XLSX Instead of CSV?*
264
+
265
+ - If you have to choose between a CSV file and an XLSX file, always choose XLSX because:
266
+ - It does not have parser errors or encoding issues that are common in CSV files.
267
+ - It contains pure structured data.
268
+ """)
269
+ st.subheader("Default Extension and Handling of XLSX")
270
+ st.markdown("""
271
+ - The default extension for Excel files is :blue-background[.xlsx.]
272
+ - Multiple Sheets = Workbook.
273
+ - Each sheet in an XLSX file can be processed separately.
274
+ """)
275
+ st.subheader("Reading XLSX Files into a DataFrame Using Pandas")
276
+ st.markdown(""" To work with Excel files in Python, you use the pandas library:
277
+ - *Read a Single Sheet*
278
+ - Use the :blue-background[pd.read_excel()] function to read an XLSX file into a DataFrame.
279
+ - By default, it reads the first sheet :blue-background[(index 0)].
280
+ """)
281
+
282
+
283
+ code = '''import pandas as pd
284
+ df = pd.read_excel('file.xlsx', sheet_name=0) # Reads the first sheet
285
+ print(df)
286
+ '''
287
+ st.code(code, language="python")
288
+ st.markdown("""
289
+ - *Key Notes:*
290
+ - Each sheet in the XLSX file can be loaded as a single DataFrame.
291
+ - Sheet indices start from 0 (zero-based indexing).
292
+ """)
293
+
294
+ st.subheader("Converting Multiple Sheets to CSV Files")
295
+ st.write("If you want to save each sheet in an XLSX file as a separate CSV file:")
296
+ st.write("*Step 1:* Read the workbook and load sheets.")
297
+ code = '''
298
+ xlsx_file = 'file.xlsx'
299
+ xls = pd.ExcelFile(xlsx_file)
300
+ '''
301
+ st.code(code, language="python")
302
+ st.write("*Step 2:* Loop through all the sheets and save them as separate CSV files:")
303
+ code = '''
304
+ for sheet_name in xls.sheet_names: # Loop through all sheet names
305
+ df = pd.read_excel(xlsx_file, sheet_name=sheet_name)
306
+ df.to_csv(f'{sheet_name}.csv', index=False) # Save each sheet as a CSV
307
+ '''
308
+ st.code(code, language="python")
309
+ st.write("Result: Each sheet is now saved as an individual CSV file.")
310
+
311
+
312
+
313
+
314
+
315
+
316
+
317
+ if st.button("Back to Structured Data"):
318
+ st.session_state.page = 'structured_data'
319
+
320
+ def sql_data_page():
321
+ st.title("SQL Databases")
322
+ st.write("SQL databases store data in structured tables, and data is queried using SQL commands.")
323
+ if st.button("Back to Structured Data"):
324
+ st.session_state.page = 'structured_data'
325
+
326
+ def csv_data_page():
327
+ st.title("CSV Files")
328
+ st.write("CSV files store data in a comma-separated format and are often used for exchanging semi-structured data.")
329
+ if st.button("Back to Semi-Structured Data"):
330
+ st.session_state.page = 'semi_structured_data'
331
+
332
+ def json_data_page():
333
+ st.title("JSON Files")
334
+ st.write("JSON files store data in a key-value format and are widely used in web development.")
335
+ if st.button("Back to Semi-Structured Data"):
336
+ st.session_state.page = 'semi_structured_data'
337
+
338
+ def xml_data_page():
339
+ st.title("XML Files")
340
+ st.write("XML files store data in a hierarchical format and are often used for semi-structured data.")
341
+ if st.button("Back to Semi-Structured Data"):
342
+ st.session_state.page = 'semi_structured_data'
343
+
344
+ def html_data_page():
345
+ st.title("HTML Files")
346
+ st.write("HTML files are used to store web page content and are typically semi-structured data.")
347
+ if st.button("Back to Semi-Structured Data"):
348
+ st.session_state.page = 'semi_structured_data'
349
+
350
+ def image_data_page():
351
+ st.title("Images")
352
+ st.write("Images are a form of unstructured data, often stored in formats like JPEG, PNG, and TIFF.")
353
+ if st.button("Back to Unstructured Data"):
354
+ st.session_state.page = 'unstructured_data'
355
+
356
+ def video_data_page():
357
+ st.title("Videos")
358
+ st.write("Videos are a form of unstructured data, often stored in formats like MP4, AVI, and MKV.")
359
+ if st.button("Back to Unstructured Data"):
360
+ st.session_state.page = 'unstructured_data'
361
+
362
+ def audio_data_page():
363
+ st.title("Audio")
364
+ st.write("Audio files are a form of unstructured data, often stored in formats like MP3, WAV, and AAC.")
365
+ if st.button("Back to Unstructured Data"):
366
+ st.session_state.page = 'unstructured_data'
367
+
368
+ def text_data_page():
369
+ st.title("Text")
370
+ st.write("Text data is unstructured and can come from sources like emails, documents, and social media.")
371
+ if st.button("Back to Unstructured Data"):
372
+ st.session_state.page = 'unstructured_data'
373
+
374
+
375
+ # Main logic to render pages based on session state
376
+ if st.session_state.page == 'main':
377
+ main_page()
378
+ elif st.session_state.page == '2_data_collection':
379
+ data_collection_page()
380
+ elif st.session_state.page == 'structured_data':
381
+ structured_data_page()
382
+ elif st.session_state.page == 'semi_structured_data':
383
+ semi_structured_data_page()
384
+ elif st.session_state.page == 'unstructured_data':
385
+ unstructured_data_page()
386
+ elif st.session_state.page == 'excel_data':
387
+ excel_data_page()
388
+ elif st.session_state.page == 'sql_data':
389
+ sql_data_page()
390
+ elif st.session_state.page == 'csv_data':
391
+ csv_data_page()
392
+ elif st.session_state.page == 'json_data':
393
+ json_data_page()
394
+ elif st.session_state.page == 'xml_data':
395
+ xml_data_page()
396
+ elif st.session_state.page == 'html_data':
397
+ html_data_page()
398
+ elif st.session_state.page == 'image_data':
399
+ image_data_page()
400
+ elif st.session_state.page == 'video_data':
401
+ video_data_page()
402
+ elif st.session_state.page == 'audio_data':
403
+ audio_data_page()
404
+ elif st.session_state.page == 'text_data':
405
+ text_data_page()