Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| # Set page configuration | |
| st.set_page_config(page_title="Your Custom ML Lifecycle", layout="centered") | |
| # Custom CSS for background color, button alignment, and styling | |
| st.markdown(""" | |
| <style> | |
| /* Set full-page background color */ | |
| .main { | |
| background-color: #f0f8ff; /* Alice Blue */ | |
| } | |
| /* Center the buttons and style as rounded rectangles */ | |
| .stButton > button { | |
| display: block; | |
| margin: 10px auto; | |
| width: 80%; /* Adjust button width */ | |
| background-color: #588c7e; /* Orange */ | |
| color: white; | |
| border: none; | |
| padding: 15px 30px; | |
| text-align: center; | |
| font-size: 16px; | |
| border-radius: 10px; /* Rounded corners */ | |
| cursor: pointer; | |
| transition-duration: 0.4s; | |
| } | |
| /* Add hover effect to buttons */ | |
| .stButton > button:hover { | |
| background-color: #E64D00; /* Darker orange */ | |
| } | |
| /* Style headers */ | |
| h1, h2 { | |
| color: #2c3e50; /* Dark blue-grey */ | |
| text-align: center; | |
| } | |
| /* Style for arrows */ | |
| .arrow { | |
| font-size: 30px; | |
| text-align: center; | |
| display: block; | |
| width: 100%; | |
| margin-top: 10px; | |
| margin-bottom: 10px; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # Navigation logic using session state | |
| if "page" not in st.session_state: | |
| st.session_state.page = "main" | |
| def go_to_main_page(): | |
| st.session_state.page = "main" | |
| def go_to_data_collection_page(): | |
| st.session_state.page = "data_collection" | |
| def go_to_semi_structured_data_page(): | |
| st.session_state.page = "semi_structured_data" | |
| def go_to_csv_page(): | |
| st.session_state.page = "csv" | |
| # Main Lifecycle Steps Page | |
| def main_page(): | |
| st.title("Machine Learning Project Lifecycle") | |
| steps = [ | |
| "1. Problem Statement", | |
| "2. Data Collection", | |
| "3. Simple EDA", | |
| "4. Data Preprocessing", | |
| "5. EDA", | |
| "6. Feature Engineering", | |
| "7. Training the Model", | |
| "8. Testing the Model", | |
| "9. Deployment", | |
| "10. Monitoring" | |
| ] | |
| descriptions = { | |
| "1. Problem Statement": "Defines the goal to achieve by the end of the project.", | |
| "2. Data Collection": "Collect the data based on the problem statement from websites, APIs, web scraping, or manually.", | |
| "3. Simple EDA": "Simple EDA evaluates data quality by identifying issues like missing values, outliers, and duplicates.", | |
| "4. Data Preprocessing": "Converts raw data into clean, preprocessed data:", | |
| "5. EDA": "Exploratory Data Analysis gives a clear understanding of the dataset.", | |
| "6. Feature Engineering": "Feature engineering improves model performance by creating, transforming, or selecting relevant features.", | |
| "7. Training the Model": "Train the model on 70% of the data to learn the relationship between input and output features.", | |
| "8. Testing the Model": "Evaluate the model on 30% of the data to assess its performance.", | |
| "9. Deployment": "Deploy the model on a web server, app, or platform to make it accessible to users.", | |
| "10. Monitoring": "Continuously track the model’s performance and retrain it if necessary." | |
| } | |
| for i, step in enumerate(steps): | |
| if step == "2. Data Collection" and st.button(step, key=f"data_collection_{i}"): | |
| go_to_data_collection_page() | |
| elif st.button(step, key=f"step_{i}"): | |
| st.subheader(step) | |
| st.write(descriptions[step]) | |
| st.write("---") | |
| # Data Collection Page | |
| def data_collection_page(): | |
| st.header("Data Collection") | |
| st.write("### What is Data?") | |
| st.write("Data refers to information that is processed or stored by a computer. This can include text, numbers, images, audio, or video.") | |
| st.write("### What is Data Collection?") | |
| st.write("Data Collection is collection of data from various sources based on the Problem statement.") | |
| st.write("#### Step 1: Problem-Based Approach") | |
| st.write("Align data collection with the specific problem statement.") | |
| st.write("#### Step 2: Data Source Prioritization") | |
| st.markdown(""" | |
| 1. **Website:** Check for direct availability. | |
| 2. **APIs:** Use for programmatic access. | |
| 3. **Web Scraping:** Extract data from websites. | |
| 4. **Manual Collection:** As a last resort, collect data manually. | |
| """) | |
| image_url = "Modern Square Typographic Fashion Brand Logo.png" | |
| st.image(image_url) | |
| if st.button(":blue[🌟 Structured Data]"): | |
| st.session_state.page = "structured_data" | |
| if st.button(":blue[📷 Unstructured Data]"): | |
| st.session_state.page = "unstructured_data" | |
| if st.button(":blue[🗃️ Semi-Structured Data]"): | |
| go_to_semi_structured_data_page() | |
| if st.button("Back to Home"): | |
| st.session_state.page = "home" | |
| st.button("Back to Main Page", on_click=go_to_main_page) | |
| # Semi-Structured Data Page | |
| def semi_structured_data_page(): | |
| st.title(":blue[Semi-Structured Data]") | |
| st.markdown(""" | |
| Semi-structured data is not organized in traditional table formats but has some organizational properties. | |
| Examples include JSON, XML, and CSV files. | |
| """) | |
| if st.button(":orange[CSV File Info]"): | |
| go_to_csv_page() | |
| if st.button("Back to Data Collection"): | |
| go_to_data_collection_page() | |
| ##CSV File Page | |
| def csv_page(): | |
| st.title(":orange[CSV File Format]") | |
| st.write("### What is a CSV File?") | |
| st.write("CSV (Comma-Separated Values) is a plain text format used to represent tabular data, where each line corresponds to a row and each value is separated by a comma.") | |
| st.write("### How to Work with CSV Files in Python") | |
| st.markdown(""" | |
| To read a CSV file in Python: | |
| ```python | |
| import pandas as pd | |
| data = pd.read_csv('file_path.csv') | |
| ``` | |
| To write to a CSV file in Python: | |
| ```python | |
| data.to_csv('file_path.csv', index=False) | |
| ``` | |
| """) | |
| st.write("### Example Data") | |
| example_data = { | |
| "Name": ["Alice", "Bob", "Charlie"], | |
| "Age": [25, 30, 35], | |
| "City": ["New York", "Los Angeles", "Chicago"] | |
| } | |
| df = pd.DataFrame(example_data) | |
| st.write("Example DataFrame:") | |
| st.dataframe(df) | |
| st.write("CSV representation:") | |
| st.code(df.to_csv(index=False), language="csv") | |
| if st.button("Back to Semi-Structured Data"): | |
| go_to_semi_structured_data_page() | |
| def go_to_json_page(): | |
| st.session_state.page = "json" | |
| # JSON File Page | |
| def json_page(): | |
| st.title(":orange[JSON Format]") | |
| st.write("### What is JSON?") | |
| st.write("JSON (JavaScript Object Notation) is a lightweight data-interchange format. It is easy for humans to read and write and easy for machines to parse and generate.") | |
| st.write("### Example JSON Data") | |
| st.code(""" | |
| { | |
| "Name": "Alice", | |
| "Age": 25, | |
| "City": "New York" | |
| } | |
| """, language="json") | |
| st.write("### How to Work with JSON in Python") | |
| st.markdown(""" | |
| To read JSON data in Python: | |
| ```python | |
| import pandas as pd | |
| import json | |
| # Reading JSON as a dictionary | |
| with open('file.json', 'r') as file: | |
| data = json.load(file) | |
| # Convert JSON to DataFrame | |
| df = pd.DataFrame(data) | |
| ``` | |
| To write to a JSON file: | |
| ```python | |
| with open('file.json', 'w') as file: | |
| json.dump(data, file, indent=4) | |
| ``` | |
| """) | |
| st.write("### JSON Example Table") | |
| example_json = { | |
| "Name": ["Alice", "Bob", "Charlie"], | |
| "Age": [25, 30, 35], | |
| "City": ["New York", "Los Angeles", "Chicago"] | |
| } | |
| df_json = pd.DataFrame(example_json) | |
| st.dataframe(df_json) | |
| if st.button("Back to Semi-Structured Data"): | |
| go_to_semi_structured_data_page() | |
| def go_to_xml_page(): | |
| st.session_state.page = "xml" | |
| # XML File Page | |
| def xml_page(): | |
| st.title(":orange[XML Format]") | |
| st.write("### What is XML?") | |
| st.write("XML (eXtensible Markup Language) is a markup language used for storing and transporting data. It is both human-readable and machine-readable.") | |
| st.write("### Example XML Data") | |
| st.code(""" | |
| <root> | |
| <person> | |
| <name>Alice</name> | |
| <age>25</age> | |
| <city>New York</city> | |
| </person> | |
| </root> | |
| """, language="xml") | |
| st.write("### How to Work with XML in Python") | |
| st.markdown(""" | |
| To read XML data in Python: | |
| ```python | |
| import xml.etree.ElementTree as ET | |
| import pandas as pd | |
| # Parse XML file | |
| tree = ET.parse('file.xml') | |
| root = tree.getroot() | |
| # Extract data | |
| data = [] | |
| for person in root.findall('person'): | |
| data.append({ | |
| 'name': person.find('name').text, | |
| 'age': int(person.find('age').text), | |
| 'city': person.find('city').text | |
| }) | |
| # Convert to DataFrame | |
| df = pd.DataFrame(data) | |
| ``` | |
| To write to an XML file, libraries like `xml.etree` or `lxml` can be used to construct nodes and save to a file. | |
| """) | |
| st.write("### XML Example Table") | |
| example_xml = { | |
| "Name": ["Alice", "Bob", "Charlie"], | |
| "Age": [25, 30, 35], | |
| "City": ["New York", "Los Angeles", "Chicago"] | |
| } | |
| df_xml = pd.DataFrame(example_xml) | |
| st.dataframe(df_xml) | |
| if st.button("Back to Semi-Structured Data"): | |
| go_to_semi_structured_data_page() | |
| def go_to_html_page(): | |
| st.session_state.page = "html" | |
| # HTML File Page | |
| def html_page(): | |
| st.title(":orange[HTML Format]") | |
| st.write("### What is HTML?") | |
| st.write(""" | |
| HTML (HyperText Markup Language) is the standard markup language used to create web pages. | |
| HTML documents structure content with elements like headings, paragraphs, tables, and links. | |
| """) | |
| st.write("### Example HTML Data") | |
| st.code(""" | |
| <html> | |
| <body> | |
| <table> | |
| <tr> | |
| <th>Name</th> | |
| <th>Age</th> | |
| <th>City</th> | |
| </tr> | |
| <tr> | |
| <td>Alice</td> | |
| <td>25</td> | |
| <td>New York</td> | |
| </tr> | |
| <tr> | |
| <td>Bob</td> | |
| <td>30</td> | |
| <td>Los Angeles</td> | |
| </tr> | |
| </table> | |
| </body> | |
| </html> | |
| """, language="html") | |
| st.write("### How to Work with HTML in Python") | |
| st.markdown(""" | |
| Use libraries like `pandas` or `BeautifulSoup` to extract and process data from HTML files. | |
| **Example: Reading an HTML Table with Pandas** | |
| ```python | |
| import pandas as pd | |
| # Read HTML table from a file or URL | |
| df = pd.read_html('file_path_or_url.html')[0] | |
| print(df) | |
| ``` | |
| **Example: Extracting Data with BeautifulSoup** | |
| ```python | |
| from bs4 import BeautifulSoup | |
| # Parse HTML file | |
| with open('file.html', 'r') as file: | |
| soup = BeautifulSoup(file, 'html.parser') | |
| # Extract table data | |
| table = soup.find('table') | |
| rows = table.find_all('tr') | |
| data = [] | |
| for row in rows[1:]: | |
| cols = row.find_all('td') | |
| data.append([col.text for col in cols]) | |
| # Convert to DataFrame | |
| import pandas as pd | |
| df = pd.DataFrame(data, columns=["Name", "Age", "City"]) | |
| print(df) | |
| ``` | |
| """) | |
| st.write("### HTML Example Table") | |
| example_html = { | |
| "Name": ["Alice", "Bob", "Charlie"], | |
| "Age": [25, 30, 35], | |
| "City": ["New York", "Los Angeles", "Chicago"] | |
| } | |
| df_html = pd.DataFrame(example_html) | |
| st.dataframe(df_html) | |
| if st.button("Back to Semi-Structured Data"): | |
| go_to_semi_structured_data_page() | |
| # Update Semi-Structured Data Page Navigation | |
| def semi_structured_data_page(): | |
| st.title(":blue[Semi-Structured Data]") | |
| st.markdown(""" | |
| Semi-structured data is not organized in traditional table formats but has some organizational properties. | |
| Examples include JSON, XML, HTML, and CSV files. | |
| """) | |
| if st.button(":orange[CSV File Info]"): | |
| go_to_csv_page() | |
| if st.button(":orange[JSON Info]"): | |
| go_to_json_page() | |
| if st.button(":orange[XML Info]"): | |
| go_to_xml_page() | |
| if st.button(":orange[HTML Info]"): | |
| go_to_html_page() | |
| if st.button("Back to Data Collection"): | |
| go_to_data_collection_page() | |
| # Page Routing Update | |
| if st.session_state.page == "main": | |
| main_page() | |
| elif st.session_state.page == "data_collection": | |
| data_collection_page() | |
| elif st.session_state.page == "semi_structured_data": | |
| semi_structured_data_page() | |
| elif st.session_state.page == "csv": | |
| csv_page() | |
| elif st.session_state.page == "json": | |
| json_page() | |
| elif st.session_state.page == "xml": | |
| xml_page() | |
| elif st.session_state.page == "html": | |
| html_page() | |