Spaces:
Build error
Build error
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| st.markdown(f""" | |
| <style> | |
| /* Set the background image for the entire app */ | |
| .stApp {{ | |
| background-color:rgba(96, 155, 124, 0.5); | |
| background-size: 1300px; | |
| background-repeat: no-repeat; | |
| background-attachment: fixed; | |
| background-position: center; | |
| }} | |
| </style> | |
| """, unsafe_allow_html=True) | |
| import streamlit as st | |
| # Navigation | |
| st.title("Life Cycle of ML") | |
| if 'page' not in st.session_state: | |
| st.session_state['page'] = 'home' | |
| # Main Navigation | |
| if st.session_state['page'] == 'home': | |
| st.subheader("Explore the Life Cycle Stages") | |
| if st.button("Data Collection"): | |
| st.session_state['page'] = 'data_collection' | |
| elif st.session_state['page'] == 'data_collection': | |
| # Data Collection Page | |
| st.title("Data Collection") | |
| st.header("1. What is Data?") | |
| st.write( | |
| "Data refers to raw facts and figures that are collected, stored, and analyzed to derive insights. " | |
| "It serves as the foundation for any machine learning model." | |
| ) | |
| st.header("2. Types of Data") | |
| data_type = st.radio( | |
| "Select a type of data to learn more:", | |
| ("Structured", "Unstructured", "Semi-Structured") | |
| ) | |
| if data_type == "Structured": | |
| st.subheader("Structured Data") | |
| st.write( | |
| "Structured data is highly organized and easily searchable within databases. " | |
| "It includes rows and columns, such as in relational databases." | |
| ) | |
| st.write("Data Formats:") | |
| format_selected = st.radio( | |
| "Select a format to explore further:", | |
| ("Excel", "CSV") | |
| ) | |
| if format_selected == "Excel": | |
| # Excel Data Format Section | |
| st.subheader("Excel Data Format") | |
| st.write("*What is it?*") | |
| st.write( | |
| "Excel files are spreadsheets used to organize and analyze data in rows and columns. " | |
| "They are widely used due to their user-friendly nature and support for various data types." | |
| ) | |
| st.write("*How to Read Excel Files?*") | |
| st.code( | |
| """ | |
| import pandas as pd | |
| # Reading an Excel file | |
| df = pd.read_excel('file.xlsx') | |
| print(df.head()) | |
| """, | |
| language="python" | |
| ) | |
| st.write("*Common Issues When Handling Excel Files*") | |
| st.write( | |
| """ | |
| - Missing or corrupted files | |
| - Version incompatibilities | |
| - Incorrect file paths | |
| - Handling large Excel files | |
| """ | |
| ) | |
| st.write("*How to Overcome These Errors/Issues?*") | |
| st.write( | |
| """ | |
| - Use proper error handling with try-except. | |
| - Convert Excel files to CSV for better compatibility. | |
| - Use libraries like openpyxl or xlrd for specific Excel versions. | |
| - Break large files into smaller chunks for processing. | |
| """ | |
| ) | |
| # Button to open Jupyter Notebook or PDF | |
| if st.button("Open Excel Documentation"): | |
| st.write("Download the [documentation notebook](path/to/excel_notebook.ipynb) or [PDF](path/to/excel_documentation.pdf).") | |
| elif format_selected == "CSV": | |
| # CSV Data Format Section | |
| st.subheader("CSV Data Format") | |
| st.write("*What is it?*") | |
| st.write( | |
| "CSV (Comma-Separated Values) files store tabular data in plain text, where each line represents a record, " | |
| "and fields are separated by commas." | |
| ) | |
| st.write("*How to Read CSV Files?*") | |
| st.code( | |
| """ | |
| import pandas as pd | |
| # Reading a CSV file | |
| df = pd.read_csv('file.csv') | |
| print(df.head()) | |
| """, | |
| language="python" | |
| ) | |
| st.write("*Common Issues When Handling CSV Files*") | |
| st.write( | |
| """ | |
| - Encoding issues (e.g., UTF-8, ISO-8859-1) | |
| - Inconsistent delimiters | |
| - Missing or corrupted files | |
| - Large file sizes causing memory errors | |
| """ | |
| ) | |
| st.write("*How to Overcome These Errors/Issues?*") | |
| st.write( | |
| """ | |
| - Specify the correct encoding when reading files using encoding='utf-8' or similar. | |
| - Use libraries like csv or pandas to handle different delimiters. | |
| - Employ error handling to catch and manage missing/corrupted files. | |
| - Use chunking to read large files in smaller parts: pd.read_csv('file.csv', chunksize=1000). | |
| """ | |
| ) | |
| # Button to open Jupyter Notebook or PDF | |
| if st.button("Open CSV Documentation"): | |
| st.write("Download the [documentation notebook](path/to/csv_notebook.ipynb) or [PDF](path/to/csv_documentation.pdf).") | |
| elif data_type == "Unstructured": | |
| st.subheader("Unstructured Data") | |
| st.write( | |
| "Unstructured data refers to information that lacks a predefined format or organization, making it challenging to analyze using traditional tools." | |
| "Examples include text, images, videos, audio, and social media posts." | |
| ) | |
| st.write("Data Formats:") | |
| format_selected = st.radio( | |
| "Select a format to explore further:", | |
| ("IMAGE","VIDEO", "AUDIO") | |
| ) | |
| #HOW TO READ TEXT | |
| if format_selected == "IMAGE": | |
| st.subheader("IMAGE Data Format") | |
| st.write("*What is it?*") | |
| st.write( | |
| "Photos, medical scans, satellite images. " | |
| ) | |
| st.write("*How to Read IMAGE Files?*") | |
| st.code( | |
| """ | |
| from PIL import Image | |
| image = Image.open('example.jpg') | |
| image.show() | |
| """, | |
| language="python" | |
| ) | |
| st.write("*Common Issues When Handling image Files*") | |
| st.write( | |
| """ | |
| - data augumentation and overfitting | |
| - image processing challenges | |
| - Data Imbalance | |
| - High Dimensionality | |
| """ | |
| ) | |
| st.write("*How to Overcome These Errors/Issues?*") | |
| st.write( | |
| """ | |
| - Data Augumentaion. | |
| - Consistent image processing | |
| - Handling Class Imbalance. | |
| - Dimensionality Reduction and Feature Extraction | |
| """ | |
| ) | |
| # Button to open Jupyter Notebook or PDF | |
| if st.button("Open IMAGE Documentation"): | |
| st.write("Download the [documentation notebook](path/to/image_notebook.ipynb) or [PDF](path/to/image_documentation.pdf).") | |
| elif format_selected == "VIDEO": | |
| st.subheader("VIDEO Data Format") | |
| st.write("*What is it?*") | |
| st.write( | |
| "PNG,GIF,BNP,RAW videos,TIFF " | |
| ) | |
| st.write("*How to Read VIDEO Files?*") | |
| st.code( | |
| """ | |
| pip install opencv-python | |
| import cv2 | |
| # Open the video file | |
| video_path = 'path_to_your_video.mp4' | |
| cap = cv2.VideoCapture(video_path) | |
| """, | |
| language="python" | |
| ) | |
| st.write("*Common Issues When Handling video Files*") | |
| st.write( | |
| """ | |
| - File not found or Corrupted. | |
| - Incompatible Codec or Format. | |
| - Performance Issues with Large Videos. | |
| - Frame Dropping or Skipping. | |
| """ | |
| ) | |
| st.write("*How to Overcome These Errors/Issues?*") | |
| st.write( | |
| """ | |
| - Ensure Correct File Path and Handle Corrupted Files. | |
| - Install Missing Codecs or Use Supported Formats. | |
| - Optimize Performance for Large Videos | |
| - Control Frame Rate and Prevent Skipping | |
| """ | |
| ) | |
| # Button to open Jupyter Notebook or PDF | |
| if st.button("Open VIDEOS Documentation"): | |
| st.write("Download the [documentation notebook](path/to/videos_notebook.ipynb) or [PDF](path/to/videos_documentation.pdf).") | |
| elif format_selected == "AUDIO": | |
| st.subheader("AUDIO Data Format") | |
| st.write("*What is it?*") | |
| st.write( | |
| "MP3,WAV,FLAC,AAC,OGG " | |
| ) | |
| st.write("*How to Read AUDIO Files?*") | |
| st.code( | |
| """ | |
| pip install librosa | |
| import librosa | |
| # Load the audio file | |
| audio_path = 'path_to_audio_file.wav' | |
| y, sr = librosa.load(audio_path, sr=None) # sr=None to preserve the original sampling rate | |
| """, | |
| language="python" | |
| ) | |
| st.write("*Common Issues When Handling audio Files*") | |
| st.write( | |
| """ | |
| - File not found or Corrupted. | |
| - Incompatible Codec or Format. | |
| - Memory Overload or Performance Issues with Large Audios. | |
| - Encoding or File Corruption Issues | |
| """ | |
| ) | |
| st.write("*How to Overcome These Errors/Issues?*") | |
| st.write( | |
| """ | |
| - File Not Found or Corrupted: Always check if the file exists before attempting to load it. Handle errors gracefully with try-except. | |
| - Incompatible Format or Codec: Use pydub or ffmpeg to handle multiple formats, or convert the file to a more compatible format. | |
| - Memory Overload or Performance Issues: Process the audio in chunks or downsample large files to reduce memory consumption. | |
| - Encoding or File Corruption Issues: Ensure proper encoding and re-encode files using tools like ffmpeg if necessary. | |
| """ | |
| ) | |
| # Button to open Jupyter Notebook or PDF | |
| if st.button("Open AUDIO Documentation"): | |
| st.write("Download the [documentation notebook](path/to/audio_notebook.ipynb) or [PDF](path/to/audio_documentation.pdf).") | |
| elif data_type == "Semi-Structured": | |
| st.subheader("Semi-structured Data") | |
| st.write( | |
| "Semi-structured data is data that doesn’t fit into a rigid structure like relational databases but has some organizational properties, such as tags or key-value pairs, making it easier to analyze.") | |
| st.write("Data Formats:") | |
| format_selected = st.radio( | |
| "Select a format to explore further:", | |
| ("JSON","XML") | |
| ) | |
| #HOW TO READ TEXT | |
| if format_selected == "JSON": | |
| st.subheader("JSON Data Format") | |
| st.write("*What is it?*") | |
| st.write( | |
| "JSON is a lightweight data-interchange format that uses key-value pairs. It is commonly used in web services and APIs for exchanging data. " | |
| ) | |
| st.write("*How to Read JSON Files?*") | |
| st.code( | |
| """ | |
| import json | |
| # Open and read the JSON file | |
| with open('data.json', 'r') as file: | |
| data = json.load(file) | |
| """, | |
| language="python" | |
| ) | |
| st.write("*Common Issues When Handling json Files*") | |
| st.write( | |
| """ | |
| - File Encoding Issues | |
| - Invalid JSON Syntax | |
| - Large JSON Files Causing Memory Issues | |
| - Inconsistent Data Structure | |
| """ | |
| ) | |
| st.write("*How to Overcome These Errors/Issues?*") | |
| st.write( | |
| """ | |
| - Validate JSON Syntax: Use tools like JSONLint or json.decoder.JSONDecodeError in Python to ensure valid JSON format. | |
| - Handle Encoding: Specify the encoding when opening the file in Python (e.g., open('file.json', 'r', encoding='utf-8')). | |
| - Use Chunking or Streaming for Large Files: For large JSON files, load the file in chunks or use libraries that support JSON streaming like ijson or jsonlines. | |
| - Consistent Structure: Ensure consistent data structure when creating JSON files, or write code to handle missing or extra fields gracefully. | |
| """ | |
| ) | |
| # Button to open Jupyter Notebook or PDF | |
| if st.button("Open JSON Documentation"): | |
| st.write("Download the [documentation notebook](path/to/JSON_notebook.ipynb) or [PDF](path/to/JSON_documentation.pdf).") | |
| elif format_selected == "XML": | |
| st.subheader("XML Data Format") | |
| st.write("*What is it?*") | |
| st.write( | |
| "XML is a flexible, structured data format used to store and transport data, utilizing tags to define elements, attributes, and hierarchical relationships between different pieces of information. " | |
| ) | |
| st.write("*How to Read XML Files?*") | |
| st.code( | |
| """ | |
| import pandas as pd | |
| pd.read_xml("Data_path") | |
| """ , | |
| language="python" | |
| ) | |
| st.write("*Common Issues When Handling XML Files*") | |
| st.write( | |
| """ | |
| - Invalid XML Syntax. | |
| - Encoding Issues. | |
| - Large XML Files. | |
| - Inconsistent Structure. | |
| """ | |
| ) | |
| st.write("*How to Overcome These Errors/Issues?*") | |
| st.write( | |
| """ | |
| - Validate XML Syntax: Use XML validators and try-except blocks to catch and fix syntax errors during parsing. | |
| - Handle Encoding Issues: Specify the encoding when reading files and use libraries like chardet to detect encoding automatically. | |
| - Process Large Files Efficiently: Use streaming parsers (e.g., iterparse()) and iterative parsing to handle large files without consuming too much memory. | |
| - Ensure Consistent Structure: Check for missing elements before accessing them and handle inconsistencies with default values or conditional logic. | |
| """ | |
| ) | |
| # Button to open Jupyter Notebook or PDF | |
| if st.button("Open XML Documentation"): | |
| st.write("Download the [documentation notebook](path/to/XML_notebook.ipynb) or [PDF](path/to/XML_documentation.pdf).") | |