Harika22 commited on
Commit
8db177a
·
verified ·
1 Parent(s): f520b3d

Update pages/3_Life cycle of ML.py

Browse files
Files changed (1) hide show
  1. pages/3_Life cycle of ML.py +129 -0
pages/3_Life cycle of ML.py CHANGED
@@ -64,3 +64,132 @@ st.markdown("""
64
  </style>
65
  """, unsafe_allow_html=True)
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  </style>
65
  """, unsafe_allow_html=True)
66
 
67
+ import webbrowser
68
+
69
+ # Function to display detailed content for "Data Collection" page
70
+ def data_collection_page():
71
+ st.write("### What is Data?")
72
+ st.write("""
73
+ Data refers to raw facts and figures that are collected and stored for analysis.
74
+ It can be structured or unstructured and comes from various sources like sensors, logs, transactions, and more.
75
+ """)
76
+
77
+ st.write("### Types of Data")
78
+ st.write("""
79
+ 1. **Structured Data**: Organized data that follows a strict schema (e.g., rows and columns).
80
+ 2. **Unstructured Data**: Data that doesn't follow a predefined model (e.g., images, text).
81
+ 3. **Semi-Structured Data**: Data that has some organizational properties but isn't fully structured (e.g., JSON, XML).
82
+ """)
83
+
84
+ # Button to select Structured Data
85
+ selected_data_type = st.radio("Choose Data Type", ["Structured Data", "Unstructured Data", "Semi-Structured Data"])
86
+
87
+ if selected_data_type == "Structured Data":
88
+ display_structured_data_info()
89
+
90
+ # Function to display structured data information and formats
91
+ def display_structured_data_info():
92
+ st.write("### Structured Data")
93
+ st.write("Structured data is data that is highly organized and stored in a fixed format, like tables, rows, and columns.")
94
+
95
+ # Button for each structured data format (Excel, CSV, XML)
96
+ data_formats = st.radio("Choose a Data Format", ["Excel", "CSV", "XML"])
97
+
98
+ if data_formats == "Excel":
99
+ display_excel_info()
100
+ elif data_formats == "CSV":
101
+ display_csv_info()
102
+ elif data_formats == "XML":
103
+ display_xml_info()
104
+
105
+ # Function to display Excel-related information
106
+ def display_excel_info():
107
+ st.write("### Excel Format")
108
+ st.write("""
109
+ **What it is**: Excel is a popular spreadsheet format commonly used for storing and analyzing structured data.
110
+
111
+ **How to read these files**:
112
+ - Use `pandas.read_excel()` to read Excel files in Python.
113
+
114
+ **Issues encountered when handling Excel files**:
115
+ - Large files can cause memory issues.
116
+ - Compatibility problems with different Excel versions.
117
+
118
+ **How to overcome these errors**:
119
+ - Break large files into smaller chunks.
120
+ - Use libraries like `openpyxl` for handling newer Excel files and `xlrd` for older ones.
121
+ """)
122
+
123
+ # Button to open the Jupyter Notebook or PDF with coding examples
124
+ if st.button("Open Excel Code Example"):
125
+ open_code_example("excel")
126
+
127
+ # Function to display CSV-related information
128
+ def display_csv_info():
129
+ st.write("### CSV Format")
130
+ st.write("""
131
+ **What it is**: CSV (Comma Separated Values) is a text format for representing tabular data, where values are separated by commas.
132
+
133
+ **How to read these files**:
134
+ - Use `pandas.read_csv()` to read CSV files in Python.
135
+
136
+ **Issues encountered when handling CSV files**:
137
+ - Improper handling of special characters or delimiters.
138
+ - Missing or inconsistent data.
139
+
140
+ **How to overcome these errors**:
141
+ - Specify delimiters using the `delimiter` parameter.
142
+ - Handle missing data by using `fillna()` or `dropna()` methods in pandas.
143
+ """)
144
+
145
+ # Button to open the Jupyter Notebook or PDF with coding examples
146
+ if st.button("Open CSV Code Example"):
147
+ open_code_example("csv")
148
+
149
+ # Function to display XML-related information
150
+ def display_xml_info():
151
+ st.write("### XML Format")
152
+ st.write("""
153
+ **What it is**: XML (eXtensible Markup Language) is a flexible and structured format used to store data in a hierarchical manner.
154
+
155
+ **How to read these files**:
156
+ - Use `pandas.read_xml()` to read XML files or `xml.etree.ElementTree` for more complex parsing.
157
+
158
+ **Issues encountered when handling XML files**:
159
+ - Complex nested structures can be hard to parse.
160
+ - Compatibility issues between different XML schemas.
161
+
162
+ **How to overcome these errors**:
163
+ - Use XPath or `lxml` for more advanced parsing.
164
+ - Handle encoding issues using the `encoding` parameter while reading the file.
165
+ """)
166
+
167
+ # Button to open the Jupyter Notebook or PDF with coding examples
168
+ if st.button("Open XML Code Example"):
169
+ open_code_example("xml")
170
+
171
+ # Function to open a Jupyter Notebook or PDF for coding examples
172
+ def open_code_example(data_format):
173
+ # Placeholder: Open a PDF/Jupyter notebook link for the data format
174
+ example_links = {
175
+ "excel": "https://yourlinktoexcelcode.com",
176
+ "csv": "https://yourlinktocsvcode.com",
177
+ "xml": "https://yourlinktoxmlcode.com",
178
+ }
179
+
180
+ link = example_links.get(data_format)
181
+ if link:
182
+ webbrowser.open_new_tab(link)
183
+
184
+ # Main Streamlit app
185
+ def main():
186
+ st.title("Machine Learning Life Cycle")
187
+ st.sidebar.title("ML Life Cycle Navigation")
188
+
189
+ # Button to go to "Data Collection" page
190
+ if st.sidebar.button("Data Collection"):
191
+ data_collection_page()
192
+
193
+ # Run the main function to start the app
194
+ if __name__ == "__main__":
195
+ main()