sree4411 commited on
Commit
eef0cbc
Β·
verified Β·
1 Parent(s): ee51681

Update pages/3_Life Cycle Of ML Project.py

Browse files
Files changed (1) hide show
  1. pages/3_Life Cycle Of ML Project.py +145 -107
pages/3_Life Cycle Of ML Project.py CHANGED
@@ -1,8 +1,6 @@
1
  import streamlit as st
2
- import pandas as pd
3
  import json
4
  import xml.etree.ElementTree as ET
5
- import html
6
 
7
  # Initialize page navigation state
8
  if 'page' not in st.session_state:
@@ -76,7 +74,7 @@ elif st.session_state.page == "structured_data":
76
  if st.button(":blue[πŸ“Š Excel]"):
77
  st.session_state.page = "excel"
78
 
79
- if st.button(":blue[πŸ“‚ CSV]"):
80
  st.session_state.page = "csv"
81
 
82
  if st.button(":red[Back to Data Collection]"):
@@ -85,13 +83,10 @@ elif st.session_state.page == "structured_data":
85
  # ----------------- CSV Data Page -----------------
86
  elif st.session_state.page == "csv":
87
  st.title(":red[CSV Data Format]")
88
- st.write("### :blue[What is CSV?]")
89
- st.write("""
90
- CSV (Comma Separated Values) is a simple file format used to store tabular data, such as a spreadsheet or database.
91
- It is widely used due to its simplicity and ease of use.
92
  """)
93
-
94
- st.write("### :blue[How to Read CSV ]")
95
  st.code("""
96
  import pandas as pd
97
  # Read a CSV file
@@ -99,27 +94,66 @@ df = pd.read_csv('data.csv')
99
  print(df)
100
  """, language='python')
101
 
102
- st.write("### Issues Encountered")
103
  st.write("""
104
- - *File not found*: Incorrect file path.
105
- - *Malformed CSV*: Incorrect number of fields in rows.
106
- """)
 
107
 
108
- st.write("### Solutions to These Issues")
109
  st.code("""
110
- # Handle missing file error
 
 
111
  try:
112
  df = pd.read_csv('data.csv')
113
  except FileNotFoundError:
114
  print("File not found. Check the file path.")
115
- # Handle malformed CSV error
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  try:
117
- df = pd.read_csv('data.csv', error_bad_lines=False)
118
- except pd.errors.ParserError:
119
- print("Malformed CSV. Check the CSV format.")
 
 
 
120
  """, language='python')
121
 
122
- st.link_button(":blue[Jupyter Notebook(colab)]","https://colab.research.google.com/drive/1sT35x4JH9s_hb31aMoUwtry-w8FE7fQg?usp=sharing")
123
 
124
  if st.button(":red[Back to Structured Data]"):
125
  st.session_state.page = "structured_data"
@@ -127,7 +161,6 @@ except pd.errors.ParserError:
127
  # ----------------- Unstructured Data Page -----------------
128
  elif st.session_state.page == "unstructured_data":
129
  st.title(":blue[Unstructured Data]")
130
-
131
  st.markdown("""
132
  *Unstructured data* does not have a predefined format. It consists of various data types like text, images, videos, and audio files.
133
  Examples include:
@@ -194,143 +227,148 @@ cv2.destroyAllWindows()
194
  """)
195
  st.code("""
196
  import librosa
197
- import librosa.display
 
 
198
  import matplotlib.pyplot as plt
199
- # Load audio file
200
- y, sr = librosa.load('sample_audio.mp3')
201
- librosa.display.waveshow(y, sr=sr)
202
- plt.title('Waveform')
203
  plt.show()
204
  """, language='python')
205
 
206
- st.markdown("### Challenges with Unstructured Data")
207
- st.write("""
208
- - *Noise and Inconsistency*: Data is often incomplete or noisy.
209
- - *Storage Requirements*: Large size and variability in data types.
210
- - *Processing Time*: Analyzing unstructured data is computationally expensive.
211
- """)
212
-
213
- st.markdown("### Solutions")
214
- st.write("""
215
- - *Data Cleaning*: Preprocess data to remove noise.
216
- - *Efficient Storage*: Use NoSQL databases (e.g., MongoDB) or cloud storage.
217
- - *Parallel Processing*: Utilize frameworks like Apache Spark.
218
- """)
219
 
220
- # Back to Data Collection
221
  if st.button(":red[Back to Data Collection]"):
222
- st.session_state.page = "data_collection"
223
 
224
  # ----------------- Semi-Structured Data Page -----------------
225
  elif st.session_state.page == "semi_structured_data":
226
  st.title(":blue[Semi-Structured Data]")
227
-
228
  st.markdown("""
229
- Semi-structured data has some level of organization, but not as rigid as structured data. Examples include:
230
- - JSON files
231
  - XML files
232
- - HTML files
 
233
  """)
234
 
235
- st.markdown("### JSON Example")
236
- if st.button(":blue[JSON Handling]"):
237
- st.session_state.page = "json"
238
-
239
- st.markdown("### XML Example")
240
- if st.button(":blue[XML Handling]"):
241
  st.session_state.page = "xml"
242
 
243
- st.markdown("### HTML Example")
244
- if st.button(":blue[HTML Handling]"):
 
 
245
  st.session_state.page = "html"
246
 
247
  if st.button(":red[Back to Data Collection]"):
248
  st.session_state.page = "data_collection"
249
 
250
- # ----------------- JSON Data Page -----------------
251
- elif st.session_state.page == "json":
252
- st.title(":blue[JSON Data]")
253
-
254
  st.markdown("""
255
- JSON (JavaScript Object Notation) is a lightweight data-interchange format that is easy for humans to read and write, and easy for machines to parse and generate.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  """)
257
 
258
- st.write("### Example of JSON:")
259
  st.code("""
260
- {
261
- "name": "John",
262
- "age": 30,
263
- "city": "New York"
264
- }
265
- """, language='json')
266
-
267
- st.write("### How to Read JSON Data in Python")
 
 
 
 
 
 
 
 
 
 
 
 
268
  st.code("""
269
  import json
270
-
271
- # Load JSON data
272
- with open('data.json', 'r') as file:
273
- data = json.load(file)
274
  print(data)
275
  """, language='python')
276
 
277
- st.write("### Issues with JSON Files")
278
  st.write("""
279
- - *File not found*: Check the file path.
280
- - *Incorrect Format*: Ensure proper JSON formatting.
281
  """)
282
 
283
  st.write("### Solutions")
284
  st.code("""
285
- # Handle JSON file not found error
286
  try:
287
- with open('data.json', 'r') as file:
288
- data = json.load(file)
289
- except FileNotFoundError:
290
- print("File not found.")
291
- # Validate JSON format
292
- import json
293
- json.loads(data)
294
  """, language='python')
295
 
296
- st.link_button(":blue[JSON Example in Jupyter Notebook]","https://colab.research.google.com/drive/1sT35x4JH9s_hb31aMoUwtry-w8FE7fQg?usp=sharing")
297
 
298
  if st.button(":red[Back to Semi-Structured Data]"):
299
  st.session_state.page = "semi_structured_data"
300
 
301
  # ----------------- HTML Data Page -----------------
302
  elif st.session_state.page == "html":
303
- st.title(":blue[HTML Data]")
304
-
305
  st.markdown("""
306
- HTML (HyperText Markup Language) is the standard language for documents designed to be displayed in a web browser.
307
  """)
308
-
309
- st.markdown("""
310
- Here's a simple HTML code example:
311
- """)
312
- st.code("""
313
- <!DOCTYPE html>
314
- <html>
315
- <head><title>Test Page</title></head>
316
- <body><h1>Hello World!</h1></body>
317
- </html>
318
- """, language="html")
319
-
320
- st.markdown("### How to Parse HTML in Python")
321
  st.code("""
322
  from bs4 import BeautifulSoup
323
 
324
- # Parse HTML
325
- html_code = '''<!DOCTYPE html>
326
- <html>
327
- <head><title>Test Page</title></head>
328
- <body><h1>Hello World!</h1></body>
329
- </html>'''
330
-
331
- soup = BeautifulSoup(html_code, 'html.parser')
332
  print(soup.prettify())
333
  """, language='python')
334
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
  if st.button(":red[Back to Semi-Structured Data]"):
336
  st.session_state.page = "semi_structured_data"
 
1
  import streamlit as st
 
2
  import json
3
  import xml.etree.ElementTree as ET
 
4
 
5
  # Initialize page navigation state
6
  if 'page' not in st.session_state:
 
74
  if st.button(":blue[πŸ“Š Excel]"):
75
  st.session_state.page = "excel"
76
 
77
+ if st.button(":blue[πŸ“‘ CSV]"):
78
  st.session_state.page = "csv"
79
 
80
  if st.button(":red[Back to Data Collection]"):
 
83
  # ----------------- CSV Data Page -----------------
84
  elif st.session_state.page == "csv":
85
  st.title(":red[CSV Data Format]")
86
+ st.markdown("""
87
+ CSV (Comma-Separated Values) is a simple format used to store tabular data. Each line in the file represents a row, and commas separate the values within the row.
 
 
88
  """)
89
+ st.markdown("### How to Read a CSV file")
 
90
  st.code("""
91
  import pandas as pd
92
  # Read a CSV file
 
94
  print(df)
95
  """, language='python')
96
 
97
+ st.markdown("### Issues Encountered")
98
  st.write("""
99
+ - *File not found*: Incorrect file path.
100
+ - *Wrong delimiter*: The CSV uses a different delimiter (e.g., semicolon).
101
+ - *Missing Libraries*: pandas might be missing.
102
+ """)
103
 
104
+ st.write("### Solutions")
105
  st.code("""
106
+ # Install required libraries
107
+ # pip install pandas
108
+ # Handle file not found
109
  try:
110
  df = pd.read_csv('data.csv')
111
  except FileNotFoundError:
112
  print("File not found. Check the file path.")
113
+ # Handle incorrect delimiter
114
+ df = pd.read_csv('data.csv', delimiter=';')
115
+ """, language='python')
116
+
117
+ st.link_button(":blue[Open Jupyter Notebook](https://colab.research.google.com/drive/1sT35x4JH9s_hb31aMoUwtry-w8FE7fQg?usp=sharing)")
118
+
119
+ if st.button(":red[Back to Structured Data]"):
120
+ st.session_state.page = "structured_data"
121
+
122
+ # ----------------- Excel Data Page -----------------
123
+ elif st.session_state.page == "excel":
124
+ st.title(":red[Excel Data Format]")
125
+ st.write("### :blue[What is Excel?]")
126
+ st.write("Excel is a spreadsheet tool for storing data in tabular format with rows and columns. Common file extensions: .xls, .xlsx.")
127
+ st.write("### :blue[How to Read Excel ]")
128
+ st.code("""
129
+ import pandas as pd
130
+ # Read an Excel file
131
+ df = pd.read_excel('data.xlsx', sheet_name='Sheet1')
132
+ print(df)
133
+ """, language='python')
134
+
135
+ st.write("### Issues Encountered")
136
+ st.write("""
137
+ - *File not found*: Incorrect file path.
138
+ - *Sheet name error*: Specified sheet doesn't exist.
139
+ - *Missing libraries*: openpyxl or xlrd might be missing.
140
+ """)
141
+
142
+ st.write("### Solutions to These Issues")
143
+ st.code("""
144
+ # Install required libraries
145
+ # pip install openpyxl xlrd
146
+ # Handle missing file
147
  try:
148
+ df = pd.read_excel('data.xlsx', sheet_name='Sheet1')
149
+ except FileNotFoundError:
150
+ print("File not found. Check the file path.")
151
+ # List available sheet names
152
+ excel_file = pd.ExcelFile('data.xlsx')
153
+ print(excel_file.sheet_names)
154
  """, language='python')
155
 
156
+ st.link_button(":blue[Open Jupyter Notebook](https://colab.research.google.com/drive/1sT35x4JH9s_hb31aMoUwtry-w8FE7fQg?usp=sharing)")
157
 
158
  if st.button(":red[Back to Structured Data]"):
159
  st.session_state.page = "structured_data"
 
161
  # ----------------- Unstructured Data Page -----------------
162
  elif st.session_state.page == "unstructured_data":
163
  st.title(":blue[Unstructured Data]")
 
164
  st.markdown("""
165
  *Unstructured data* does not have a predefined format. It consists of various data types like text, images, videos, and audio files.
166
  Examples include:
 
227
  """)
228
  st.code("""
229
  import librosa
230
+ # Load an audio file
231
+ y, sr = librosa.load('sample_audio.wav')
232
+ # Display waveform
233
  import matplotlib.pyplot as plt
234
+ plt.figure(figsize=(10, 4))
235
+ plt.plot(y)
236
+ plt.title("Audio Waveform")
 
237
  plt.show()
238
  """, language='python')
239
 
240
+ st.link_button(":blue[Open Jupyter Notebook](https://colab.research.google.com/drive/1sT35x4JH9s_hb31aMoUwtry-w8FE7fQg?usp=sharing)")
 
 
 
 
 
 
 
 
 
 
 
 
241
 
 
242
  if st.button(":red[Back to Data Collection]"):
243
+ st.session_state.page = "data_collection"
244
 
245
  # ----------------- Semi-Structured Data Page -----------------
246
  elif st.session_state.page == "semi_structured_data":
247
  st.title(":blue[Semi-Structured Data]")
 
248
  st.markdown("""
249
+ Semi-structured data is data that does not conform to a rigid schema like structured data but still has some organization, typically with tags or markers to separate elements.
250
+ Examples:
251
  - XML files
252
+ - JSON files
253
+ - HTML documents
254
  """)
255
 
256
+ if st.button(":blue[πŸ“œ XML]"):
 
 
 
 
 
257
  st.session_state.page = "xml"
258
 
259
+ if st.button(":blue[πŸ“„ JSON]"):
260
+ st.session_state.page = "json"
261
+
262
+ if st.button(":blue[🌐 HTML]"):
263
  st.session_state.page = "html"
264
 
265
  if st.button(":red[Back to Data Collection]"):
266
  st.session_state.page = "data_collection"
267
 
268
+ # ----------------- XML Data Page -----------------
269
+ elif st.session_state.page == "xml":
270
+ st.title(":red[XML Data Format]")
 
271
  st.markdown("""
272
+ XML (Extensible Markup Language) is used to store and transport data. It uses tags to define data elements.
273
+ """)
274
+ st.markdown("### How to Read XML Data")
275
+ st.code("""
276
+ import xml.etree.ElementTree as ET
277
+ tree = ET.parse('data.xml')
278
+ root = tree.getroot()
279
+ print(root.tag, root.attrib)
280
+ for child in root:
281
+ print(child.tag, child.attrib)
282
+ for elem in child.iter():
283
+ print(elem.tag, elem.text)
284
+ """, language='python')
285
+
286
+ st.markdown("### Issues Encountered")
287
+ st.write("""
288
+ - *Invalid XML structure*: Ensure the XML is well-formed.
289
+ - *File not found*: Check the path to the XML file.
290
  """)
291
 
292
+ st.write("### Solutions")
293
  st.code("""
294
+ # Handle invalid XML structure
295
+ try:
296
+ tree = ET.parse('data.xml')
297
+ root = tree.getroot()
298
+ except ET.ParseError:
299
+ print("Error in parsing XML file")
300
+ """, language='python')
301
+
302
+ st.link_button(":blue[Open Jupyter Notebook](https://colab.research.google.com/drive/1sT35x4JH9s_hb31aMoUwtry-w8FE7fQg?usp=sharing)")
303
+
304
+ if st.button(":red[Back to Semi-Structured Data]"):
305
+ st.session_state.page = "semi_structured_data"
306
+
307
+ # ----------------- JSON Data Page -----------------
308
+ elif st.session_state.page == "json":
309
+ st.title(":red[JSON Data Format]")
310
+ st.markdown("""
311
+ JSON (JavaScript Object Notation) is a lightweight format for storing and exchanging data. It is human-readable and easy to parse.
312
+ """)
313
+ st.markdown("### How to Read JSON Data")
314
  st.code("""
315
  import json
316
+ # Open and load the JSON data
317
+ with open('data.json') as json_file:
318
+ data = json.load(json_file)
 
319
  print(data)
320
  """, language='python')
321
 
322
+ st.markdown("### Issues Encountered")
323
  st.write("""
324
+ - *Invalid JSON structure*: Ensure the file is a well-formed JSON.
325
+ - *File not found*: Incorrect path to JSON file.
326
  """)
327
 
328
  st.write("### Solutions")
329
  st.code("""
330
+ # Handle invalid JSON structure
331
  try:
332
+ with open('data.json') as json_file:
333
+ data = json.load(json_file)
334
+ except json.JSONDecodeError:
335
+ print("Error: Invalid JSON format")
 
 
 
336
  """, language='python')
337
 
338
+ st.link_button(":blue[Open Jupyter Notebook](https://colab.research.google.com/drive/1sT35x4JH9s_hb31aMoUwtry-w8FE7fQg?usp=sharing)")
339
 
340
  if st.button(":red[Back to Semi-Structured Data]"):
341
  st.session_state.page = "semi_structured_data"
342
 
343
  # ----------------- HTML Data Page -----------------
344
  elif st.session_state.page == "html":
345
+ st.title(":red[HTML Data Format]")
 
346
  st.markdown("""
347
+ HTML (Hypertext Markup Language) is the standard markup language for documents designed to be displayed in a web browser.
348
  """)
349
+ st.markdown("### How to Handle HTML Data")
 
 
 
 
 
 
 
 
 
 
 
 
350
  st.code("""
351
  from bs4 import BeautifulSoup
352
 
353
+ html_content = '''<html><head><title>Test Page</title></head><body><h1>Welcome</h1></body></html>'''
354
+ soup = BeautifulSoup(html_content, 'html.parser')
 
 
 
 
 
 
355
  print(soup.prettify())
356
  """, language='python')
357
 
358
+ st.markdown("### Issues Encountered")
359
+ st.write("""
360
+ - *Malformed HTML*: HTML content needs to be correctly structured.
361
+ - *Missing libraries*: BeautifulSoup might be missing.
362
+ """)
363
+
364
+ st.write("### Solutions")
365
+ st.code("""
366
+ # Install BeautifulSoup if missing
367
+ # pip install beautifulsoup4
368
+ # Correct malformed HTML
369
+ """, language='python')
370
+
371
+ st.link_button(":blue[Open Jupyter Notebook](https://colab.research.google.com/drive/1sT35x4JH9s_hb31aMoUwtry-w8FE7fQg?usp=sharing)")
372
+
373
  if st.button(":red[Back to Semi-Structured Data]"):
374
  st.session_state.page = "semi_structured_data"