MachineLearning / pages /life_cycle of_ml.py
bhuvi06's picture
Upload 4 files
47900ee verified
import streamlit as st
import pandas as pd
# Set page configuration
st.set_page_config(page_title="Your Custom ML Lifecycle", layout="centered")
# Custom CSS for background color, button alignment, and styling
st.markdown("""
<style>
/* Set full-page background color */
.main {
background-color: #f0f8ff; /* Alice Blue */
}
/* Center the buttons and style as rounded rectangles */
.stButton > button {
display: block;
margin: 10px auto;
width: 80%; /* Adjust button width */
background-color: #588c7e; /* Orange */
color: white;
border: none;
padding: 15px 30px;
text-align: center;
font-size: 16px;
border-radius: 10px; /* Rounded corners */
cursor: pointer;
transition-duration: 0.4s;
}
/* Add hover effect to buttons */
.stButton > button:hover {
background-color: #E64D00; /* Darker orange */
}
/* Style headers */
h1, h2 {
color: #2c3e50; /* Dark blue-grey */
text-align: center;
}
/* Style for arrows */
.arrow {
font-size: 30px;
text-align: center;
display: block;
width: 100%;
margin-top: 10px;
margin-bottom: 10px;
}
</style>
""", unsafe_allow_html=True)
# Navigation logic using session state
if "page" not in st.session_state:
st.session_state.page = "main"
def go_to_main_page():
st.session_state.page = "main"
def go_to_data_collection_page():
st.session_state.page = "data_collection"
def go_to_semi_structured_data_page():
st.session_state.page = "semi_structured_data"
def go_to_csv_page():
st.session_state.page = "csv"
# Main Lifecycle Steps Page
def main_page():
st.title("Machine Learning Project Lifecycle")
steps = [
"1. Problem Statement",
"2. Data Collection",
"3. Simple EDA",
"4. Data Preprocessing",
"5. EDA",
"6. Feature Engineering",
"7. Training the Model",
"8. Testing the Model",
"9. Deployment",
"10. Monitoring"
]
descriptions = {
"1. Problem Statement": "Defines the goal to achieve by the end of the project.",
"2. Data Collection": "Collect the data based on the problem statement from websites, APIs, web scraping, or manually.",
"3. Simple EDA": "Simple EDA evaluates data quality by identifying issues like missing values, outliers, and duplicates.",
"4. Data Preprocessing": "Converts raw data into clean, preprocessed data:",
"5. EDA": "Exploratory Data Analysis gives a clear understanding of the dataset.",
"6. Feature Engineering": "Feature engineering improves model performance by creating, transforming, or selecting relevant features.",
"7. Training the Model": "Train the model on 70% of the data to learn the relationship between input and output features.",
"8. Testing the Model": "Evaluate the model on 30% of the data to assess its performance.",
"9. Deployment": "Deploy the model on a web server, app, or platform to make it accessible to users.",
"10. Monitoring": "Continuously track the model’s performance and retrain it if necessary."
}
for i, step in enumerate(steps):
if step == "2. Data Collection" and st.button(step, key=f"data_collection_{i}"):
go_to_data_collection_page()
elif st.button(step, key=f"step_{i}"):
st.subheader(step)
st.write(descriptions[step])
st.write("---")
# Data Collection Page
def data_collection_page():
st.header("Data Collection")
st.write("### What is Data?")
st.write("Data refers to information that is processed or stored by a computer. This can include text, numbers, images, audio, or video.")
st.write("### What is Data Collection?")
st.write("Data Collection is collection of data from various sources based on the Problem statement.")
st.write("#### Step 1: Problem-Based Approach")
st.write("Align data collection with the specific problem statement.")
st.write("#### Step 2: Data Source Prioritization")
st.markdown("""
1. **Website:** Check for direct availability.
2. **APIs:** Use for programmatic access.
3. **Web Scraping:** Extract data from websites.
4. **Manual Collection:** As a last resort, collect data manually.
""")
image_url = "Modern Square Typographic Fashion Brand Logo.png"
st.image(image_url)
if st.button(":blue[🌟 Structured Data]"):
st.session_state.page = "structured_data"
if st.button(":blue[📷 Unstructured Data]"):
st.session_state.page = "unstructured_data"
if st.button(":blue[🗃️ Semi-Structured Data]"):
go_to_semi_structured_data_page()
if st.button("Back to Home"):
st.session_state.page = "home"
st.button("Back to Main Page", on_click=go_to_main_page)
# Semi-Structured Data Page
def semi_structured_data_page():
st.title(":blue[Semi-Structured Data]")
st.markdown("""
Semi-structured data is not organized in traditional table formats but has some organizational properties.
Examples include JSON, XML, and CSV files.
""")
if st.button(":orange[CSV File Info]"):
go_to_csv_page()
if st.button("Back to Data Collection"):
go_to_data_collection_page()
##CSV File Page
def csv_page():
st.title(":orange[CSV File Format]")
st.write("### What is a CSV File?")
st.write("CSV (Comma-Separated Values) is a plain text format used to represent tabular data, where each line corresponds to a row and each value is separated by a comma.")
st.write("### How to Work with CSV Files in Python")
st.markdown("""
To read a CSV file in Python:
```python
import pandas as pd
data = pd.read_csv('file_path.csv')
```
To write to a CSV file in Python:
```python
data.to_csv('file_path.csv', index=False)
```
""")
st.write("### Example Data")
example_data = {
"Name": ["Alice", "Bob", "Charlie"],
"Age": [25, 30, 35],
"City": ["New York", "Los Angeles", "Chicago"]
}
df = pd.DataFrame(example_data)
st.write("Example DataFrame:")
st.dataframe(df)
st.write("CSV representation:")
st.code(df.to_csv(index=False), language="csv")
if st.button("Back to Semi-Structured Data"):
go_to_semi_structured_data_page()
def go_to_json_page():
st.session_state.page = "json"
# JSON File Page
def json_page():
st.title(":orange[JSON Format]")
st.write("### What is JSON?")
st.write("JSON (JavaScript Object Notation) is a lightweight data-interchange format. It is easy for humans to read and write and easy for machines to parse and generate.")
st.write("### Example JSON Data")
st.code("""
{
"Name": "Alice",
"Age": 25,
"City": "New York"
}
""", language="json")
st.write("### How to Work with JSON in Python")
st.markdown("""
To read JSON data in Python:
```python
import pandas as pd
import json
# Reading JSON as a dictionary
with open('file.json', 'r') as file:
data = json.load(file)
# Convert JSON to DataFrame
df = pd.DataFrame(data)
```
To write to a JSON file:
```python
with open('file.json', 'w') as file:
json.dump(data, file, indent=4)
```
""")
st.write("### JSON Example Table")
example_json = {
"Name": ["Alice", "Bob", "Charlie"],
"Age": [25, 30, 35],
"City": ["New York", "Los Angeles", "Chicago"]
}
df_json = pd.DataFrame(example_json)
st.dataframe(df_json)
if st.button("Back to Semi-Structured Data"):
go_to_semi_structured_data_page()
def go_to_xml_page():
st.session_state.page = "xml"
# XML File Page
def xml_page():
st.title(":orange[XML Format]")
st.write("### What is XML?")
st.write("XML (eXtensible Markup Language) is a markup language used for storing and transporting data. It is both human-readable and machine-readable.")
st.write("### Example XML Data")
st.code("""
<root>
<person>
<name>Alice</name>
<age>25</age>
<city>New York</city>
</person>
</root>
""", language="xml")
st.write("### How to Work with XML in Python")
st.markdown("""
To read XML data in Python:
```python
import xml.etree.ElementTree as ET
import pandas as pd
# Parse XML file
tree = ET.parse('file.xml')
root = tree.getroot()
# Extract data
data = []
for person in root.findall('person'):
data.append({
'name': person.find('name').text,
'age': int(person.find('age').text),
'city': person.find('city').text
})
# Convert to DataFrame
df = pd.DataFrame(data)
```
To write to an XML file, libraries like `xml.etree` or `lxml` can be used to construct nodes and save to a file.
""")
st.write("### XML Example Table")
example_xml = {
"Name": ["Alice", "Bob", "Charlie"],
"Age": [25, 30, 35],
"City": ["New York", "Los Angeles", "Chicago"]
}
df_xml = pd.DataFrame(example_xml)
st.dataframe(df_xml)
if st.button("Back to Semi-Structured Data"):
go_to_semi_structured_data_page()
def go_to_html_page():
st.session_state.page = "html"
# HTML File Page
def html_page():
st.title(":orange[HTML Format]")
st.write("### What is HTML?")
st.write("""
HTML (HyperText Markup Language) is the standard markup language used to create web pages.
HTML documents structure content with elements like headings, paragraphs, tables, and links.
""")
st.write("### Example HTML Data")
st.code("""
<html>
<body>
<table>
<tr>
<th>Name</th>
<th>Age</th>
<th>City</th>
</tr>
<tr>
<td>Alice</td>
<td>25</td>
<td>New York</td>
</tr>
<tr>
<td>Bob</td>
<td>30</td>
<td>Los Angeles</td>
</tr>
</table>
</body>
</html>
""", language="html")
st.write("### How to Work with HTML in Python")
st.markdown("""
Use libraries like `pandas` or `BeautifulSoup` to extract and process data from HTML files.
**Example: Reading an HTML Table with Pandas**
```python
import pandas as pd
# Read HTML table from a file or URL
df = pd.read_html('file_path_or_url.html')[0]
print(df)
```
**Example: Extracting Data with BeautifulSoup**
```python
from bs4 import BeautifulSoup
# Parse HTML file
with open('file.html', 'r') as file:
soup = BeautifulSoup(file, 'html.parser')
# Extract table data
table = soup.find('table')
rows = table.find_all('tr')
data = []
for row in rows[1:]:
cols = row.find_all('td')
data.append([col.text for col in cols])
# Convert to DataFrame
import pandas as pd
df = pd.DataFrame(data, columns=["Name", "Age", "City"])
print(df)
```
""")
st.write("### HTML Example Table")
example_html = {
"Name": ["Alice", "Bob", "Charlie"],
"Age": [25, 30, 35],
"City": ["New York", "Los Angeles", "Chicago"]
}
df_html = pd.DataFrame(example_html)
st.dataframe(df_html)
if st.button("Back to Semi-Structured Data"):
go_to_semi_structured_data_page()
# Update Semi-Structured Data Page Navigation
def semi_structured_data_page():
st.title(":blue[Semi-Structured Data]")
st.markdown("""
Semi-structured data is not organized in traditional table formats but has some organizational properties.
Examples include JSON, XML, HTML, and CSV files.
""")
if st.button(":orange[CSV File Info]"):
go_to_csv_page()
if st.button(":orange[JSON Info]"):
go_to_json_page()
if st.button(":orange[XML Info]"):
go_to_xml_page()
if st.button(":orange[HTML Info]"):
go_to_html_page()
if st.button("Back to Data Collection"):
go_to_data_collection_page()
# Page Routing Update
if st.session_state.page == "main":
main_page()
elif st.session_state.page == "data_collection":
data_collection_page()
elif st.session_state.page == "semi_structured_data":
semi_structured_data_page()
elif st.session_state.page == "csv":
csv_page()
elif st.session_state.page == "json":
json_page()
elif st.session_state.page == "xml":
xml_page()
elif st.session_state.page == "html":
html_page()