Spaces:

sree4411
/

Zero_To_Hero_In_MachineLearning

Sleeping

App Files Files Community

sree4411 commited on Dec 14, 2024

Commit

eef0cbc

verified ·

1 Parent(s): ee51681

Update pages/3_Life Cycle Of ML Project.py

Browse files

Files changed (1) hide show

pages/3_Life Cycle Of ML Project.py +145 -107

pages/3_Life Cycle Of ML Project.py CHANGED Viewed

@@ -1,8 +1,6 @@
 import streamlit as st
-import pandas as pd
 import json
 import xml.etree.ElementTree as ET
-import html
 # Initialize page navigation state
 if 'page' not in st.session_state:
@@ -76,7 +74,7 @@ elif st.session_state.page == "structured_data":
     if st.button(":blue[📊 Excel]"):
         st.session_state.page = "excel"
-    if st.button(":blue[📂 CSV]"):
         st.session_state.page = "csv"
     if st.button(":red[Back to Data Collection]"):
@@ -85,13 +83,10 @@ elif st.session_state.page == "structured_data":
 # ----------------- CSV Data Page -----------------
 elif st.session_state.page == "csv":
     st.title(":red[CSV Data Format]")
-    st.write("### :blue[What is CSV?]")
-    st.write("""
-    CSV (Comma Separated Values) is a simple file format used to store tabular data, such as a spreadsheet or database.
-    It is widely used due to its simplicity and ease of use.
     """)
-    st.write("### :blue[How to Read CSV ]")
     st.code("""
 import pandas as pd
 # Read a CSV file
@@ -99,27 +94,66 @@ df = pd.read_csv('data.csv')
 print(df)
     """, language='python')
-    st.write("### Issues Encountered")
     st.write("""
-- *File not found*: Incorrect file path.
-- *Malformed CSV*: Incorrect number of fields in rows.
-""")
-    st.write("### Solutions to These Issues")
     st.code("""
-# Handle missing file error
 try:
     df = pd.read_csv('data.csv')
 except FileNotFoundError:
     print("File not found. Check the file path.")
-# Handle malformed CSV error
 try:
-    df = pd.read_csv('data.csv', error_bad_lines=False)
-except pd.errors.ParserError:
-    print("Malformed CSV. Check the CSV format.")
     """, language='python')
-    st.link_button(":blue[Jupyter Notebook(colab)]","https://colab.research.google.com/drive/1sT35x4JH9s_hb31aMoUwtry-w8FE7fQg?usp=sharing")
     if st.button(":red[Back to Structured Data]"):
         st.session_state.page = "structured_data"
@@ -127,7 +161,6 @@ except pd.errors.ParserError:
 # ----------------- Unstructured Data Page -----------------
 elif st.session_state.page == "unstructured_data":
     st.title(":blue[Unstructured Data]")
     st.markdown("""
     *Unstructured data* does not have a predefined format. It consists of various data types like text, images, videos, and audio files.
     Examples include:
@@ -194,143 +227,148 @@ cv2.destroyAllWindows()
     """)
     st.code("""
 import librosa
-import librosa.display
 import matplotlib.pyplot as plt
-# Load audio file
-y, sr = librosa.load('sample_audio.mp3')
-librosa.display.waveshow(y, sr=sr)
-plt.title('Waveform')
 plt.show()
     """, language='python')
-    st.markdown("### Challenges with Unstructured Data")
-    st.write("""
-    - *Noise and Inconsistency*: Data is often incomplete or noisy.
-    - *Storage Requirements*: Large size and variability in data types.
-    - *Processing Time*: Analyzing unstructured data is computationally expensive.
-    """)
-    st.markdown("### Solutions")
-    st.write("""
-    - *Data Cleaning*: Preprocess data to remove noise.
-    - *Efficient Storage*: Use NoSQL databases (e.g., MongoDB) or cloud storage.
-    - *Parallel Processing*: Utilize frameworks like Apache Spark.
-    """)
-    # Back to Data Collection
     if st.button(":red[Back to Data Collection]"):
-        st.session_state.page = "data_collection"
 # ----------------- Semi-Structured Data Page -----------------
 elif st.session_state.page == "semi_structured_data":
     st.title(":blue[Semi-Structured Data]")
     st.markdown("""
-    Semi-structured data has some level of organization, but not as rigid as structured data. Examples include:
-    - JSON files
     - XML files
-    - HTML files
     """)
-    st.markdown("### JSON Example")
-    if st.button(":blue[JSON Handling]"):
-        st.session_state.page = "json"
-    st.markdown("### XML Example")
-    if st.button(":blue[XML Handling]"):
         st.session_state.page = "xml"
-    st.markdown("### HTML Example")
-    if st.button(":blue[HTML Handling]"):
         st.session_state.page = "html"
     if st.button(":red[Back to Data Collection]"):
         st.session_state.page = "data_collection"
-# ----------------- JSON Data Page -----------------
-elif st.session_state.page == "json":
-    st.title(":blue[JSON Data]")
     st.markdown("""
-    JSON (JavaScript Object Notation) is a lightweight data-interchange format that is easy for humans to read and write, and easy for machines to parse and generate.
     """)
-    st.write("### Example of JSON:")
     st.code("""
-{
-    "name": "John",
-    "age": 30,
-    "city": "New York"
-}
-    """, language='json')
-    st.write("### How to Read JSON Data in Python")
     st.code("""
 import json
-# Load JSON data
-with open('data.json', 'r') as file:
-    data = json.load(file)
     print(data)
     """, language='python')
-    st.write("### Issues with JSON Files")
     st.write("""
-    - *File not found*: Check the file path.
-    - *Incorrect Format*: Ensure proper JSON formatting.
     """)
     st.write("### Solutions")
     st.code("""
-# Handle JSON file not found error
 try:
-    with open('data.json', 'r') as file:
-        data = json.load(file)
-except FileNotFoundError:
-    print("File not found.")
-# Validate JSON format
-import json
-json.loads(data)
     """, language='python')
-    st.link_button(":blue[JSON Example in Jupyter Notebook]","https://colab.research.google.com/drive/1sT35x4JH9s_hb31aMoUwtry-w8FE7fQg?usp=sharing")
     if st.button(":red[Back to Semi-Structured Data]"):
         st.session_state.page = "semi_structured_data"
 # ----------------- HTML Data Page -----------------
 elif st.session_state.page == "html":
-    st.title(":blue[HTML Data]")
     st.markdown("""
-    HTML (HyperText Markup Language) is the standard language for documents designed to be displayed in a web browser.
     """)
-    st.markdown("""
-    Here's a simple HTML code example:
-    """)
-    st.code("""
-<!DOCTYPE html>
-<html>
-<head><title>Test Page</title></head>
-<body><h1>Hello World!</h1></body>
-</html>
-    """, language="html")
-    st.markdown("### How to Parse HTML in Python")
     st.code("""
 from bs4 import BeautifulSoup
-# Parse HTML
-html_code = '''<!DOCTYPE html>
-<html>
-<head><title>Test Page</title></head>
-<body><h1>Hello World!</h1></body>
-</html>'''
-soup = BeautifulSoup(html_code, 'html.parser')
 print(soup.prettify())
     """, language='python')
     if st.button(":red[Back to Semi-Structured Data]"):
         st.session_state.page = "semi_structured_data"

 import streamlit as st
 import json
 import xml.etree.ElementTree as ET
 # Initialize page navigation state
 if 'page' not in st.session_state:
     if st.button(":blue[📊 Excel]"):
         st.session_state.page = "excel"
+    if st.button(":blue[📑 CSV]"):
         st.session_state.page = "csv"
     if st.button(":red[Back to Data Collection]"):
 # ----------------- CSV Data Page -----------------
 elif st.session_state.page == "csv":
     st.title(":red[CSV Data Format]")
+    st.markdown("""
+    CSV (Comma-Separated Values) is a simple format used to store tabular data. Each line in the file represents a row, and commas separate the values within the row.
     """)
+    st.markdown("### How to Read a CSV file")
     st.code("""
 import pandas as pd
 # Read a CSV file
 print(df)
     """, language='python')
+    st.markdown("### Issues Encountered")
     st.write("""
+    - *File not found*: Incorrect file path.
+    - *Wrong delimiter*: The CSV uses a different delimiter (e.g., semicolon).
+    - *Missing Libraries*: pandas might be missing.
+    """)
+    st.write("### Solutions")
     st.code("""
+# Install required libraries
+# pip install pandas
+# Handle file not found
 try:
     df = pd.read_csv('data.csv')
 except FileNotFoundError:
     print("File not found. Check the file path.")
+# Handle incorrect delimiter
+df = pd.read_csv('data.csv', delimiter=';')
+    """, language='python')
+    st.link_button(":blue[Open Jupyter Notebook](https://colab.research.google.com/drive/1sT35x4JH9s_hb31aMoUwtry-w8FE7fQg?usp=sharing)")
+    if st.button(":red[Back to Structured Data]"):
+        st.session_state.page = "structured_data"
+# ----------------- Excel Data Page -----------------
+elif st.session_state.page == "excel":
+    st.title(":red[Excel Data Format]")
+    st.write("### :blue[What is Excel?]")
+    st.write("Excel is a spreadsheet tool for storing data in tabular format with rows and columns. Common file extensions: .xls, .xlsx.")
+    st.write("### :blue[How to Read Excel ]")
+    st.code("""
+import pandas as pd
+# Read an Excel file
+df = pd.read_excel('data.xlsx', sheet_name='Sheet1')
+print(df)
+    """, language='python')
+    st.write("### Issues Encountered")
+    st.write("""
+    - *File not found*: Incorrect file path.
+    - *Sheet name error*: Specified sheet doesn't exist.
+    - *Missing libraries*: openpyxl or xlrd might be missing.
+    """)
+    st.write("### Solutions to These Issues")
+    st.code("""
+# Install required libraries
+# pip install openpyxl xlrd
+# Handle missing file
 try:
+    df = pd.read_excel('data.xlsx', sheet_name='Sheet1')
+except FileNotFoundError:
+    print("File not found. Check the file path.")
+# List available sheet names
+excel_file = pd.ExcelFile('data.xlsx')
+print(excel_file.sheet_names)
     """, language='python')
+    st.link_button(":blue[Open Jupyter Notebook](https://colab.research.google.com/drive/1sT35x4JH9s_hb31aMoUwtry-w8FE7fQg?usp=sharing)")
     if st.button(":red[Back to Structured Data]"):
         st.session_state.page = "structured_data"
 # ----------------- Unstructured Data Page -----------------
 elif st.session_state.page == "unstructured_data":
     st.title(":blue[Unstructured Data]")
     st.markdown("""
     *Unstructured data* does not have a predefined format. It consists of various data types like text, images, videos, and audio files.
     Examples include:
     """)
     st.code("""
 import librosa
+# Load an audio file
+y, sr = librosa.load('sample_audio.wav')
+# Display waveform
 import matplotlib.pyplot as plt
+plt.figure(figsize=(10, 4))
+plt.plot(y)
+plt.title("Audio Waveform")
 plt.show()
     """, language='python')
+    st.link_button(":blue[Open Jupyter Notebook](https://colab.research.google.com/drive/1sT35x4JH9s_hb31aMoUwtry-w8FE7fQg?usp=sharing)")
     if st.button(":red[Back to Data Collection]"):
+        st.session_state.page = "data_collection"
 # ----------------- Semi-Structured Data Page -----------------
 elif st.session_state.page == "semi_structured_data":
     st.title(":blue[Semi-Structured Data]")
     st.markdown("""
+    Semi-structured data is data that does not conform to a rigid schema like structured data but still has some organization, typically with tags or markers to separate elements.
+    Examples:
     - XML files
+    - JSON files
+    - HTML documents
     """)
+    if st.button(":blue[📜 XML]"):
         st.session_state.page = "xml"
+    if st.button(":blue[📄 JSON]"):
+        st.session_state.page = "json"
+    if st.button(":blue[🌐 HTML]"):
         st.session_state.page = "html"
     if st.button(":red[Back to Data Collection]"):
         st.session_state.page = "data_collection"
+# ----------------- XML Data Page -----------------
+elif st.session_state.page == "xml":
+    st.title(":red[XML Data Format]")
     st.markdown("""
+    XML (Extensible Markup Language) is used to store and transport data. It uses tags to define data elements.
+    """)
+    st.markdown("### How to Read XML Data")
+    st.code("""
+import xml.etree.ElementTree as ET
+tree = ET.parse('data.xml')
+root = tree.getroot()
+print(root.tag, root.attrib)
+for child in root:
+    print(child.tag, child.attrib)
+    for elem in child.iter():
+        print(elem.tag, elem.text)
+    """, language='python')
+    st.markdown("### Issues Encountered")
+    st.write("""
+    - *Invalid XML structure*: Ensure the XML is well-formed.
+    - *File not found*: Check the path to the XML file.
     """)
+    st.write("### Solutions")
     st.code("""
+# Handle invalid XML structure
+try:
+    tree = ET.parse('data.xml')
+    root = tree.getroot()
+except ET.ParseError:
+    print("Error in parsing XML file")
+    """, language='python')
+    st.link_button(":blue[Open Jupyter Notebook](https://colab.research.google.com/drive/1sT35x4JH9s_hb31aMoUwtry-w8FE7fQg?usp=sharing)")
+    if st.button(":red[Back to Semi-Structured Data]"):
+        st.session_state.page = "semi_structured_data"
+# ----------------- JSON Data Page -----------------
+elif st.session_state.page == "json":
+    st.title(":red[JSON Data Format]")
+    st.markdown("""
+    JSON (JavaScript Object Notation) is a lightweight format for storing and exchanging data. It is human-readable and easy to parse.
+    """)
+    st.markdown("### How to Read JSON Data")
     st.code("""
 import json
+# Open and load the JSON data
+with open('data.json') as json_file:
+    data = json.load(json_file)
     print(data)
     """, language='python')
+    st.markdown("### Issues Encountered")
     st.write("""
+    - *Invalid JSON structure*: Ensure the file is a well-formed JSON.
+    - *File not found*: Incorrect path to JSON file.
     """)
     st.write("### Solutions")
     st.code("""
+# Handle invalid JSON structure
 try:
+    with open('data.json') as json_file:
+        data = json.load(json_file)
+except json.JSONDecodeError:
+    print("Error: Invalid JSON format")
     """, language='python')
+    st.link_button(":blue[Open Jupyter Notebook](https://colab.research.google.com/drive/1sT35x4JH9s_hb31aMoUwtry-w8FE7fQg?usp=sharing)")
     if st.button(":red[Back to Semi-Structured Data]"):
         st.session_state.page = "semi_structured_data"
 # ----------------- HTML Data Page -----------------
 elif st.session_state.page == "html":
+    st.title(":red[HTML Data Format]")
     st.markdown("""
+    HTML (Hypertext Markup Language) is the standard markup language for documents designed to be displayed in a web browser.
     """)
+    st.markdown("### How to Handle HTML Data")
     st.code("""
 from bs4 import BeautifulSoup
+html_content = '''<html><head><title>Test Page</title></head><body><h1>Welcome</h1></body></html>'''
+soup = BeautifulSoup(html_content, 'html.parser')
 print(soup.prettify())
     """, language='python')
+    st.markdown("### Issues Encountered")
+    st.write("""
+    - *Malformed HTML*: HTML content needs to be correctly structured.
+    - *Missing libraries*: BeautifulSoup might be missing.
+    """)
+    st.write("### Solutions")
+    st.code("""
+# Install BeautifulSoup if missing
+# pip install beautifulsoup4
+# Correct malformed HTML
+    """, language='python')
+    st.link_button(":blue[Open Jupyter Notebook](https://colab.research.google.com/drive/1sT35x4JH9s_hb31aMoUwtry-w8FE7fQg?usp=sharing)")
     if st.button(":red[Back to Semi-Structured Data]"):
         st.session_state.page = "semi_structured_data"