Spaces:
Sleeping
Sleeping
Update pages/3_Life cycle of ML.py
Browse files- pages/3_Life cycle of ML.py +129 -0
pages/3_Life cycle of ML.py
CHANGED
|
@@ -64,3 +64,132 @@ st.markdown("""
|
|
| 64 |
</style>
|
| 65 |
""", unsafe_allow_html=True)
|
| 66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
</style>
|
| 65 |
""", unsafe_allow_html=True)
|
| 66 |
|
| 67 |
+
import webbrowser
|
| 68 |
+
|
| 69 |
+
# Function to display detailed content for "Data Collection" page
|
| 70 |
+
def data_collection_page():
|
| 71 |
+
st.write("### What is Data?")
|
| 72 |
+
st.write("""
|
| 73 |
+
Data refers to raw facts and figures that are collected and stored for analysis.
|
| 74 |
+
It can be structured or unstructured and comes from various sources like sensors, logs, transactions, and more.
|
| 75 |
+
""")
|
| 76 |
+
|
| 77 |
+
st.write("### Types of Data")
|
| 78 |
+
st.write("""
|
| 79 |
+
1. **Structured Data**: Organized data that follows a strict schema (e.g., rows and columns).
|
| 80 |
+
2. **Unstructured Data**: Data that doesn't follow a predefined model (e.g., images, text).
|
| 81 |
+
3. **Semi-Structured Data**: Data that has some organizational properties but isn't fully structured (e.g., JSON, XML).
|
| 82 |
+
""")
|
| 83 |
+
|
| 84 |
+
# Button to select Structured Data
|
| 85 |
+
selected_data_type = st.radio("Choose Data Type", ["Structured Data", "Unstructured Data", "Semi-Structured Data"])
|
| 86 |
+
|
| 87 |
+
if selected_data_type == "Structured Data":
|
| 88 |
+
display_structured_data_info()
|
| 89 |
+
|
| 90 |
+
# Function to display structured data information and formats
|
| 91 |
+
def display_structured_data_info():
|
| 92 |
+
st.write("### Structured Data")
|
| 93 |
+
st.write("Structured data is data that is highly organized and stored in a fixed format, like tables, rows, and columns.")
|
| 94 |
+
|
| 95 |
+
# Button for each structured data format (Excel, CSV, XML)
|
| 96 |
+
data_formats = st.radio("Choose a Data Format", ["Excel", "CSV", "XML"])
|
| 97 |
+
|
| 98 |
+
if data_formats == "Excel":
|
| 99 |
+
display_excel_info()
|
| 100 |
+
elif data_formats == "CSV":
|
| 101 |
+
display_csv_info()
|
| 102 |
+
elif data_formats == "XML":
|
| 103 |
+
display_xml_info()
|
| 104 |
+
|
| 105 |
+
# Function to display Excel-related information
|
| 106 |
+
def display_excel_info():
|
| 107 |
+
st.write("### Excel Format")
|
| 108 |
+
st.write("""
|
| 109 |
+
**What it is**: Excel is a popular spreadsheet format commonly used for storing and analyzing structured data.
|
| 110 |
+
|
| 111 |
+
**How to read these files**:
|
| 112 |
+
- Use `pandas.read_excel()` to read Excel files in Python.
|
| 113 |
+
|
| 114 |
+
**Issues encountered when handling Excel files**:
|
| 115 |
+
- Large files can cause memory issues.
|
| 116 |
+
- Compatibility problems with different Excel versions.
|
| 117 |
+
|
| 118 |
+
**How to overcome these errors**:
|
| 119 |
+
- Break large files into smaller chunks.
|
| 120 |
+
- Use libraries like `openpyxl` for handling newer Excel files and `xlrd` for older ones.
|
| 121 |
+
""")
|
| 122 |
+
|
| 123 |
+
# Button to open the Jupyter Notebook or PDF with coding examples
|
| 124 |
+
if st.button("Open Excel Code Example"):
|
| 125 |
+
open_code_example("excel")
|
| 126 |
+
|
| 127 |
+
# Function to display CSV-related information
|
| 128 |
+
def display_csv_info():
|
| 129 |
+
st.write("### CSV Format")
|
| 130 |
+
st.write("""
|
| 131 |
+
**What it is**: CSV (Comma Separated Values) is a text format for representing tabular data, where values are separated by commas.
|
| 132 |
+
|
| 133 |
+
**How to read these files**:
|
| 134 |
+
- Use `pandas.read_csv()` to read CSV files in Python.
|
| 135 |
+
|
| 136 |
+
**Issues encountered when handling CSV files**:
|
| 137 |
+
- Improper handling of special characters or delimiters.
|
| 138 |
+
- Missing or inconsistent data.
|
| 139 |
+
|
| 140 |
+
**How to overcome these errors**:
|
| 141 |
+
- Specify delimiters using the `delimiter` parameter.
|
| 142 |
+
- Handle missing data by using `fillna()` or `dropna()` methods in pandas.
|
| 143 |
+
""")
|
| 144 |
+
|
| 145 |
+
# Button to open the Jupyter Notebook or PDF with coding examples
|
| 146 |
+
if st.button("Open CSV Code Example"):
|
| 147 |
+
open_code_example("csv")
|
| 148 |
+
|
| 149 |
+
# Function to display XML-related information
|
| 150 |
+
def display_xml_info():
|
| 151 |
+
st.write("### XML Format")
|
| 152 |
+
st.write("""
|
| 153 |
+
**What it is**: XML (eXtensible Markup Language) is a flexible and structured format used to store data in a hierarchical manner.
|
| 154 |
+
|
| 155 |
+
**How to read these files**:
|
| 156 |
+
- Use `pandas.read_xml()` to read XML files or `xml.etree.ElementTree` for more complex parsing.
|
| 157 |
+
|
| 158 |
+
**Issues encountered when handling XML files**:
|
| 159 |
+
- Complex nested structures can be hard to parse.
|
| 160 |
+
- Compatibility issues between different XML schemas.
|
| 161 |
+
|
| 162 |
+
**How to overcome these errors**:
|
| 163 |
+
- Use XPath or `lxml` for more advanced parsing.
|
| 164 |
+
- Handle encoding issues using the `encoding` parameter while reading the file.
|
| 165 |
+
""")
|
| 166 |
+
|
| 167 |
+
# Button to open the Jupyter Notebook or PDF with coding examples
|
| 168 |
+
if st.button("Open XML Code Example"):
|
| 169 |
+
open_code_example("xml")
|
| 170 |
+
|
| 171 |
+
# Function to open a Jupyter Notebook or PDF for coding examples
|
| 172 |
+
def open_code_example(data_format):
|
| 173 |
+
# Placeholder: Open a PDF/Jupyter notebook link for the data format
|
| 174 |
+
example_links = {
|
| 175 |
+
"excel": "https://yourlinktoexcelcode.com",
|
| 176 |
+
"csv": "https://yourlinktocsvcode.com",
|
| 177 |
+
"xml": "https://yourlinktoxmlcode.com",
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
link = example_links.get(data_format)
|
| 181 |
+
if link:
|
| 182 |
+
webbrowser.open_new_tab(link)
|
| 183 |
+
|
| 184 |
+
# Main Streamlit app
|
| 185 |
+
def main():
|
| 186 |
+
st.title("Machine Learning Life Cycle")
|
| 187 |
+
st.sidebar.title("ML Life Cycle Navigation")
|
| 188 |
+
|
| 189 |
+
# Button to go to "Data Collection" page
|
| 190 |
+
if st.sidebar.button("Data Collection"):
|
| 191 |
+
data_collection_page()
|
| 192 |
+
|
| 193 |
+
# Run the main function to start the app
|
| 194 |
+
if __name__ == "__main__":
|
| 195 |
+
main()
|