import streamlit as st import pandas as pd # Set page configuration st.set_page_config(page_title="Your Custom ML Lifecycle", layout="centered") # Custom CSS for background color, button alignment, and styling st.markdown(""" """, unsafe_allow_html=True) # Navigation logic using session state if "page" not in st.session_state: st.session_state.page = "main" def go_to_main_page(): st.session_state.page = "main" def go_to_data_collection_page(): st.session_state.page = "data_collection" def go_to_semi_structured_data_page(): st.session_state.page = "semi_structured_data" def go_to_csv_page(): st.session_state.page = "csv" # Main Lifecycle Steps Page def main_page(): st.title("Machine Learning Project Lifecycle") steps = [ "1. Problem Statement", "2. Data Collection", "3. Simple EDA", "4. Data Preprocessing", "5. EDA", "6. Feature Engineering", "7. Training the Model", "8. Testing the Model", "9. Deployment", "10. Monitoring" ] descriptions = { "1. Problem Statement": "Defines the goal to achieve by the end of the project.", "2. Data Collection": "Collect the data based on the problem statement from websites, APIs, web scraping, or manually.", "3. Simple EDA": "Simple EDA evaluates data quality by identifying issues like missing values, outliers, and duplicates.", "4. Data Preprocessing": "Converts raw data into clean, preprocessed data:", "5. EDA": "Exploratory Data Analysis gives a clear understanding of the dataset.", "6. Feature Engineering": "Feature engineering improves model performance by creating, transforming, or selecting relevant features.", "7. Training the Model": "Train the model on 70% of the data to learn the relationship between input and output features.", "8. Testing the Model": "Evaluate the model on 30% of the data to assess its performance.", "9. Deployment": "Deploy the model on a web server, app, or platform to make it accessible to users.", "10. Monitoring": "Continuously track the modelโ€™s performance and retrain it if necessary." } for i, step in enumerate(steps): if step == "2. Data Collection" and st.button(step, key=f"data_collection_{i}"): go_to_data_collection_page() elif st.button(step, key=f"step_{i}"): st.subheader(step) st.write(descriptions[step]) st.write("---") # Data Collection Page def data_collection_page(): st.header("Data Collection") st.write("### What is Data?") st.write("Data refers to information that is processed or stored by a computer. This can include text, numbers, images, audio, or video.") st.write("### What is Data Collection?") st.write("Data Collection is collection of data from various sources based on the Problem statement.") st.write("#### Step 1: Problem-Based Approach") st.write("Align data collection with the specific problem statement.") st.write("#### Step 2: Data Source Prioritization") st.markdown(""" 1. **Website:** Check for direct availability. 2. **APIs:** Use for programmatic access. 3. **Web Scraping:** Extract data from websites. 4. **Manual Collection:** As a last resort, collect data manually. """) image_url = "Modern Square Typographic Fashion Brand Logo.png" st.image(image_url) if st.button(":blue[๐ŸŒŸ Structured Data]"): st.session_state.page = "structured_data" if st.button(":blue[๐Ÿ“ท Unstructured Data]"): st.session_state.page = "unstructured_data" if st.button(":blue[๐Ÿ—ƒ๏ธ Semi-Structured Data]"): go_to_semi_structured_data_page() if st.button("Back to Home"): st.session_state.page = "home" st.button("Back to Main Page", on_click=go_to_main_page) # Semi-Structured Data Page def semi_structured_data_page(): st.title(":blue[Semi-Structured Data]") st.markdown(""" Semi-structured data is not organized in traditional table formats but has some organizational properties. Examples include JSON, XML, and CSV files. """) if st.button(":orange[CSV File Info]"): go_to_csv_page() if st.button("Back to Data Collection"): go_to_data_collection_page() ##CSV File Page def csv_page(): st.title(":orange[CSV File Format]") st.write("### What is a CSV File?") st.write("CSV (Comma-Separated Values) is a plain text format used to represent tabular data, where each line corresponds to a row and each value is separated by a comma.") st.write("### How to Work with CSV Files in Python") st.markdown(""" To read a CSV file in Python: ```python import pandas as pd data = pd.read_csv('file_path.csv') ``` To write to a CSV file in Python: ```python data.to_csv('file_path.csv', index=False) ``` """) st.write("### Example Data") example_data = { "Name": ["Alice", "Bob", "Charlie"], "Age": [25, 30, 35], "City": ["New York", "Los Angeles", "Chicago"] } df = pd.DataFrame(example_data) st.write("Example DataFrame:") st.dataframe(df) st.write("CSV representation:") st.code(df.to_csv(index=False), language="csv") if st.button("Back to Semi-Structured Data"): go_to_semi_structured_data_page() def go_to_json_page(): st.session_state.page = "json" # JSON File Page def json_page(): st.title(":orange[JSON Format]") st.write("### What is JSON?") st.write("JSON (JavaScript Object Notation) is a lightweight data-interchange format. It is easy for humans to read and write and easy for machines to parse and generate.") st.write("### Example JSON Data") st.code(""" { "Name": "Alice", "Age": 25, "City": "New York" } """, language="json") st.write("### How to Work with JSON in Python") st.markdown(""" To read JSON data in Python: ```python import pandas as pd import json # Reading JSON as a dictionary with open('file.json', 'r') as file: data = json.load(file) # Convert JSON to DataFrame df = pd.DataFrame(data) ``` To write to a JSON file: ```python with open('file.json', 'w') as file: json.dump(data, file, indent=4) ``` """) st.write("### JSON Example Table") example_json = { "Name": ["Alice", "Bob", "Charlie"], "Age": [25, 30, 35], "City": ["New York", "Los Angeles", "Chicago"] } df_json = pd.DataFrame(example_json) st.dataframe(df_json) if st.button("Back to Semi-Structured Data"): go_to_semi_structured_data_page() def go_to_xml_page(): st.session_state.page = "xml" # XML File Page def xml_page(): st.title(":orange[XML Format]") st.write("### What is XML?") st.write("XML (eXtensible Markup Language) is a markup language used for storing and transporting data. It is both human-readable and machine-readable.") st.write("### Example XML Data") st.code(""" Alice 25 New York """, language="xml") st.write("### How to Work with XML in Python") st.markdown(""" To read XML data in Python: ```python import xml.etree.ElementTree as ET import pandas as pd # Parse XML file tree = ET.parse('file.xml') root = tree.getroot() # Extract data data = [] for person in root.findall('person'): data.append({ 'name': person.find('name').text, 'age': int(person.find('age').text), 'city': person.find('city').text }) # Convert to DataFrame df = pd.DataFrame(data) ``` To write to an XML file, libraries like `xml.etree` or `lxml` can be used to construct nodes and save to a file. """) st.write("### XML Example Table") example_xml = { "Name": ["Alice", "Bob", "Charlie"], "Age": [25, 30, 35], "City": ["New York", "Los Angeles", "Chicago"] } df_xml = pd.DataFrame(example_xml) st.dataframe(df_xml) if st.button("Back to Semi-Structured Data"): go_to_semi_structured_data_page() def go_to_html_page(): st.session_state.page = "html" # HTML File Page def html_page(): st.title(":orange[HTML Format]") st.write("### What is HTML?") st.write(""" HTML (HyperText Markup Language) is the standard markup language used to create web pages. HTML documents structure content with elements like headings, paragraphs, tables, and links. """) st.write("### Example HTML Data") st.code("""
Name Age City
Alice 25 New York
Bob 30 Los Angeles
""", language="html") st.write("### How to Work with HTML in Python") st.markdown(""" Use libraries like `pandas` or `BeautifulSoup` to extract and process data from HTML files. **Example: Reading an HTML Table with Pandas** ```python import pandas as pd # Read HTML table from a file or URL df = pd.read_html('file_path_or_url.html')[0] print(df) ``` **Example: Extracting Data with BeautifulSoup** ```python from bs4 import BeautifulSoup # Parse HTML file with open('file.html', 'r') as file: soup = BeautifulSoup(file, 'html.parser') # Extract table data table = soup.find('table') rows = table.find_all('tr') data = [] for row in rows[1:]: cols = row.find_all('td') data.append([col.text for col in cols]) # Convert to DataFrame import pandas as pd df = pd.DataFrame(data, columns=["Name", "Age", "City"]) print(df) ``` """) st.write("### HTML Example Table") example_html = { "Name": ["Alice", "Bob", "Charlie"], "Age": [25, 30, 35], "City": ["New York", "Los Angeles", "Chicago"] } df_html = pd.DataFrame(example_html) st.dataframe(df_html) if st.button("Back to Semi-Structured Data"): go_to_semi_structured_data_page() # Update Semi-Structured Data Page Navigation def semi_structured_data_page(): st.title(":blue[Semi-Structured Data]") st.markdown(""" Semi-structured data is not organized in traditional table formats but has some organizational properties. Examples include JSON, XML, HTML, and CSV files. """) if st.button(":orange[CSV File Info]"): go_to_csv_page() if st.button(":orange[JSON Info]"): go_to_json_page() if st.button(":orange[XML Info]"): go_to_xml_page() if st.button(":orange[HTML Info]"): go_to_html_page() if st.button("Back to Data Collection"): go_to_data_collection_page() # Page Routing Update if st.session_state.page == "main": main_page() elif st.session_state.page == "data_collection": data_collection_page() elif st.session_state.page == "semi_structured_data": semi_structured_data_page() elif st.session_state.page == "csv": csv_page() elif st.session_state.page == "json": json_page() elif st.session_state.page == "xml": xml_page() elif st.session_state.page == "html": html_page()