Zero_To_Hero_In_MachineLearning / pages /3_Life Cycle Of ML Project.py
sree4411's picture
Update pages/3_Life Cycle Of ML Project.py
4cd01c5 verified
raw
history blame
35.4 kB
import streamlit as st
import json
import xml.etree.ElementTree as ET
import pandas as pd
# Initialize page navigation state
if 'page' not in st.session_state:
st.session_state.page = "home" # Default page is "home"
# ----------------- Home Page -----------------
if st.session_state.page == "home":
st.title(":red[Lifecycle of a Machine Learning Project]")
st.markdown("Click on a stage to learn more about it.")
# Buttons for various stages of the ML project lifecycle
if st.button(":blue[🌟 Problem Statement]"):
st.markdown("### Problem Statement\nIdentify the problem you want to solve and set clear objectives and success criteria.")
if st.button(":blue[πŸ“Š Data Collection]"):
st.session_state.page = "data_collection"
if st.button(":blue[πŸ› οΈ Simple EDA]"):
st.markdown("### Simple EDA\nPerform exploratory data analysis to understand data distributions and relationships.")
if st.button(":blue[Data Pre-Processing]"):
st.markdown("### Data Pre-Processing\nConvert raw data into cleaned data.")
if st.button(":blue[πŸ“ˆ Exploratory Data Analysis (EDA)]"):
st.markdown("### Exploratory Data Analysis (EDA)\nVisualize and analyze the data to understand its distributions and relationships.")
if st.button(":blue[πŸ‹οΈ Feature Engineering]"):
st.markdown("### Feature Engineering\nCreate new features from existing data.")
if st.button(":blue[πŸ€– Model Training]"):
st.markdown("### Model Training\nTrain the model using the training data and optimize its parameters.")
if st.button(":blue[πŸ”§ Model Testing]"):
st.markdown("### Model Testing\nAssess the model's performance using various metrics and cross-validation techniques.")
if st.button(":blue[πŸš€ Model Deployment]"):
st.markdown("### Model Deployment\nIntegrate the trained model into a production environment and monitor its performance.")
if st.button(":blue[πŸ“ Monitoring]"):
st.markdown("### Monitoring\nPeriodically retrain the model with new data and update features as needed.")
# ----------------- Data Collection Page -----------------
elif st.session_state.page == "data_collection":
st.title(":red[Data Collection]")
st.markdown("""
### :blue[What is Data?]\nData is a collection of raw information from various sources. On its own, it may not carry any immediate meaning, but when processed and analyzed, it can be used to derive insights, make decisions, and support the development of systems across various fields, such as business, science, engineering, and more.
In the context of Machine Learning (ML), data plays an even more pivotal role. It serves as the input for algorithms, enabling them to learn, predict, and classify based on patterns found in the data. Without data, there is nothing for the ML model to learn from, and consequently, the model cannot make any predictions or inferences.
""")
st.markdown(":red[Types of Data]: \n*Structured, **Unstructured, **Semi-Structured*")
if st.button(":blue[🌟 Structured Data]"):
st.session_state.page = "structured_data"
if st.button(":blue[πŸ“· Unstructured Data]"):
st.session_state.page = "unstructured_data"
if st.button(":blue[πŸ—ƒοΈ Semi-Structured Data]"):
st.session_state.page = "semi_structured_data"
if st.button(":red[Back to Home]"):
st.session_state.page = "home"
# ----------------- Structured Data Page -----------------
elif st.session_state.page == "structured_data":
st.title(":red[Structured Data]")
st.markdown("""
Structured data is highly organized and typically stored in tables like spreadsheets or databases. It is easy to search and analyze.
""")
st.markdown("### Examples: Excel files")
if st.button(":blue[πŸ“Š Excel]"):
st.session_state.page = "excel"
if st.button(":red[Back to Data Collection]"):
st.session_state.page = "data_collection"
# ----------------- Excel Data Page -----------------
elif st.session_state.page == "excel":
st.title(":red[Excel Data Format]")
st.write("### :blue[What is Excel?]")
st.write("Excel is a spreadsheet tool for storing data in tabular format with rows and columns. Common file extensions: .xls, .xlsx.")
st.write("### :blue[How to Read Excel ]")
st.code("""
import pandas as pd
# Read an Excel file
df = pd.read_excel('data.xlsx', sheet_name='Sheet1')
print(df)
""", language='python')
st.write("### Issues Encountered")
st.write("""
- *File not found*: Incorrect file path.
- *Sheet name error*: Specified sheet doesn't exist.
- *Missing libraries*: openpyxl or xlrd might be missing.
""")
st.write("### Solutions to These Issues")
st.code("""
# Install required libraries
# pip install openpyxl xlrd
# Handle missing file
try:
df = pd.read_excel('data.xlsx', sheet_name='Sheet1')
except FileNotFoundError:
print("File not found. Check the file path.")
# List available sheet names
excel_file = pd.ExcelFile('data.xlsx')
print(excel_file.sheet_names)
""", language='python')
st.link_button(":blue[Jupyter Notebook(colab)]","https://colab.research.google.com/drive/1sT35x4JH9s_hb31aMoUwtry-w8FE7fQg?usp=sharing")
if st.button(":red[Back to Structured Data]"):
st.session_state.page = "structured_data"
# ----------------- Unstructured Data Page -----------------
elif st.session_state.page == "unstructured_data":
st.title(":red[Unstructured Data]")
st.markdown("""
*Unstructured data* does not have a predefined format. It consists of various data types like text, images, videos, and audio files.
Examples include:
- Text documents (e.g., .txt, .docx)
- Images (e.g., .jpg, .png)
- Videos (e.g., .mp4, .avi)
- Audio files (e.g., .mp3, .wav)
""")
#st.header("πŸ“„ Handling Text Data")
# st.markdown("""
# Text data can be analyzed using Natural Language Processing (NLP) techniques.
# st.code("""
# Reading text data
#with open('sample.txt', 'r') as file:
# text = file.read()
# print(text)
# Basic text processing using NLTK
#import nltk
#from nltk.tokenize import word_tokenize
##print(tokens)
# """, language='python')
# st.header("πŸŽ₯ Handling Video Data")
# st.markdown("""
# Videos can be processed frame by frame using OpenCV.
# """)
# st.code("""
#import cv2
# Capture video
#video = cv2.VideoCapture('sample_video.mp4')
#while video.isOpened():
#ret, frame = video.read()
#if not ret:
# break
#cv2.imshow('Frame', frame)
#if cv2.waitKey(1) & 0xFF == ord('q'):
# break
#video.release()
#cv2.destroyAllWindows()
#""", language='python')
# st.header("πŸ–ΌοΈ Handling Image Data")
# st.markdown("""
# Image data can be processed using libraries like OpenCV and PIL (Pillow).
#""")
# st.code("""
#from PIL import Image
# Open an image file
#image = Image.open('sample_image.jpg')
#image.show()
# Convert image to grayscale
#gray_image = image.convert('L')
#gray_image.show()
# """, language='python')
# Button to Navigate to Introduction to Image
if st.button(":blue[Introduction to Image]"):
st.session_state.page = "Introduction_to_image"
if st.button("Back to Data Collection"):
st.session_state.page = "data_collection"
# ----------------- Introduction to Image -----------------
# Ensure proper indentation for this section of the Streamlit app
if st.session_state.page == "Introduction_to_image":
st.header(":red[πŸ–ΌοΈ What is an Image?]")
st.markdown("""
An image is a two-dimensional visual representation of objects, people, scenes, or concepts. It can be captured using devices like cameras or scanners, or created digitally. Images are composed of individual units called pixels, which contain information about brightness and color.
#### Types of Images:
- **Raster Images (Bitmap)**: Composed of a grid of pixels. Common formats include:
- JPEG
- PNG
- GIF
- **Vector Images**: Defined by mathematical equations and geometric shapes (like lines and curves). Common format:
- SVG (Scalable Vector Graphics)
- **3D Images**: Represent objects or scenes in three dimensions, often used for rendering and modeling.
#### Image Representation:
- **Grayscale Image**: Each pixel has a single intensity value, typically ranging from 0 (black) to 255 (white), representing different shades of gray.
- **Color Image**: Usually represented in the RGB color space, where each pixel consists of three values indicating the intensity of Red, Green, and Blue.
#### Applications of Images:
- **Photography & Visual Media**: Capturing moments and storytelling.
- **Medical Imaging**: Diagnosing conditions using X-rays, MRIs, etc.
- **Machine Learning & AI**: Tasks like image classification, object detection, and facial recognition.
- **Remote Sensing**: Analyzing geographic and environmental data using satellite imagery.
- **Graphic Design & Art**: Creating visual content for marketing and design.
""")
st.code("""
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
# Open an image file
image = Image.open('sample_image.jpg')
image.show()
# Convert image to grayscale
gray_image = image.convert('L')
gray_image.show()
# Resize the image
resized_image = image.resize((200, 200))
resized_image.show()
# Rotate the image by 90 degrees
rotated_image = image.rotate(90)
rotated_image.show()
# Convert the image to a NumPy array and display its shape
image_array = np.array(image)
print(image_array.shape)
# Display the image array as a plot
plt.imshow(image)
plt.title("Original Image")
plt.axis('off')
plt.show()
""", language='python')
st.header(":red[Color Spaces in Machine Learning]")
st.markdown("""
A color space is a mathematical model for representing colors. In machine learning, different color spaces can be used for preprocessing and analyzing image data, depending on the task.
#### Common Color Spaces:
- **RGB (Red, Green, Blue)**: The most common color space for digital images. Each pixel is represented by a combination of three values corresponding to the red, green, and blue channels.
- **Use Cases**: Image classification, general-purpose image analysis.
- **HSV (Hue, Saturation, Value)**: Separates color information (hue) from intensity (value), making it useful for tasks where distinguishing between color variations and intensity is important.
- **Use Cases**: Color-based object detection, image segmentation, color tracking.
- **CMYK (Cyan, Magenta, Yellow, Black)**: Primarily used for printing, not commonly used in machine learning, but useful for preparing images for printers.
- **Use Cases**: Printing applications.
- **LAB (Lightness, A, B)**: Designed to be perceptually uniform, meaning that the perceptual difference between colors is consistent across the space.
- **Use Cases**: Color correction, image processing tasks requiring color consistency.
""")
st.image("https://www.numerical-tours.com/matlab/multidim_1_color/index_01.png", caption="RGB color spaces", use_container_width=True )
if st.button(":blue[Operations Using OpenCV]"):
st.session_state.page = "operations_using_opencv"
# Button for navigating back
if st.button("Back to Data Collection"):
st.session_state.page = "data_collection"
# ---------- OPERATIONS USING OPENCV --------------------------------
elif st.session_state.page == "operations_using_opencv":
# Header and description for cv2.imread
st.header(":red[πŸ—‚οΈ Reading an Image with cv2.imread()]")
st.markdown("""
**`cv2.imread()` - Read an Image**
**Purpose:** Load an image from a file and convert it to a NumPy array.
**Syntax:**
```python
image = cv2.imread(filename, flags)
```
**Common Flags:**
- `cv2.IMREAD_COLOR` (default, loads a color image).
- `cv2.IMREAD_GRAYSCALE` (loads the image in grayscale).
- `cv2.IMREAD_UNCHANGED` (loads the image as is, with alpha transparency if available).
**Return:**
- A NumPy array representing the image.
- Returns `None` if the image cannot be loaded.
**Example:**
```python
import cv2
image = cv2.imread('image.jpg', cv2.IMREAD_COLOR)
```
""")
# Header and description for cv2.imshow
st.header(":red[πŸ–ΌοΈ Displaying an Image with cv2.imshow()]")
st.markdown("""
**`cv2.imshow()` - Display an Image**
**Purpose:** Show an image in a window.
**Syntax:**
```python
cv2.imshow(window_name, image)
```
**Requirements:**
- Call `cv2.waitKey()` to keep the window open until a key is pressed.
- Call `cv2.destroyAllWindows()` to close the window(s).
**Behavior:**
- Displays the image in a resizable window.
- The image must be a NumPy array.
**Example:**
```python
import cv2
cv2.imshow('Image Window', image)
cv2.waitKey(0) # Wait for a key press
cv2.destroyAllWindows() # Close the window
```
""")
# Header and description for cv2.imwrite
st.header(":red[πŸ’Ύ Saving an Image with cv2.imwrite()]")
st.markdown("""
**`cv2.imwrite()` - Write/Save an Image**
**Purpose:** Save an image to a file.
**Syntax:**
```python
cv2.imwrite(filename, image)
```
**File Format:**
Determined by the file extension (`.jpg`, `.png`, etc.).
**Return:**
- `True` if the image is saved successfully, `False` otherwise.
**Optional Parameters:**
- **JPEG Quality:** `cv2.IMWRITE_JPEG_QUALITY` (0 to 100, default is 95).
- **PNG Compression:** `cv2.IMWRITE_PNG_COMPRESSION` (0 to 9, default is 3).
**Example:**
```python
import cv2
cv2.imwrite('output.jpg', image)
```
""")
##Navigation Button
if st.button(":blue[Conversion of Images]"):
st.session_state.page = "Conversion_of_Images"
if st.button(":blue[Back to operations using opencv]"):
st.session_state.page = "operations_using_opencv"
if st.button(":blue[Back to Unstructured Collection]"):
st.session_state.page = "unstructured_data"
# Navigation Button
if st.button("Back to Data Collection"):
st.session_state.page = "data_collection"
##------------CONVERSION OF IMAGE-----------------
elif st.session_state.page == "Conversion_of_Images":
# Header for Image Conversion
st.header(":red[πŸ”„ Converting Images Between Different Color Spaces]")
st.markdown("""
**OpenCV supports many color spaces for image processing.**
**Common Conversions:**
- **BGR to Grayscale:** Converts a color image to grayscale.
- **BGR to RGB:** Converts from OpenCV's default BGR format to the standard RGB format.
- **BGR to HSV:** Converts the image to the HSV (Hue, Saturation, Value) color space.\n
**Examples of Conversions:**
```python
import cv2
# Load the image
image = cv2.imread('image.jpg')
# Convert BGR to Grayscale
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Convert BGR to RGB
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Convert BGR to HSV
hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
```
**Why Convert Color Spaces?**
- **Grayscale:** Useful for reducing image complexity in tasks like edge detection.
- **RGB:** Standard format for visualization in libraries like `matplotlib`.
- **HSV:** Useful for color-based segmentation, as it separates color information from brightness.
""")
st.image("https://miro.medium.com/v2/resize:fit:1400/1*_uRqXYQQE9nvM7lOR0Psnw.png", caption="RGB to Gray", use_container_width=True)
# Header for Splitting Channels
st.header(":red[πŸ”Ή Splitting Color Channels in an Image]")
st.markdown("""
**Splitting an image into its individual color channels (B, G, R) allows you to analyze or modify each channel independently.**
**Syntax:**
```python
b, g, r = cv2.split(image)
```
**Example:**
```python
import cv2
# Load the image
image = cv2.imread('image.jpg')
# Split the image into Blue, Green, and Red channels
blue_channel, green_channel, red_channel = cv2.split(image)
# Display the channels separately (Optional)
cv2.imshow('Blue Channel', blue_channel)
cv2.imshow('Green Channel', green_channel)
cv2.imshow('Red Channel', red_channel)
cv2.waitKey(0)
cv2.destroyAllWindows()
```
**Explanation:**
- The `cv2.split()` function returns the Blue, Green, and Red channels as separate images (grayscale format).
""")
# Header for Merging Channels
st.header(":red[πŸ”Ή Merging Color Channels in an Image]")
st.markdown("""
**You can merge the individual channels back into a color image using `cv2.merge()`.**
**Syntax:**
```python
merged_image = cv2.merge((b, g, r))
```
**Example:**
```python
import cv2
# Load the image
image = cv2.imread('image.jpg')
# Split the image into channels
b, g, r = cv2.split(image)
# Merge the channels back into a color image
merged_image = cv2.merge((b, g, r))
# Display the merged image
cv2.imshow('Merged Image', merged_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
```
**Explanation:**
- The `cv2.merge()` function takes a tuple of channels `(b, g, r)` and combines them back into a single color image.
- You can manipulate the individual channels before merging to achieve different effects.
""")
# Header for Combining with Modifications
st.header(":red[🎨 Modifying Channels Before Merging]")
st.markdown("""
**You can modify each channel (e.g., increase brightness in the red channel) before merging them back together.**
**Example:**
```python
import cv2
# Load the image
image = cv2.imread('image.jpg')
# Split channels
b, g, r = cv2.split(image)
# Increase the intensity of the red channel
r = cv2.add(r, 50)
# Merge the modified channels
modified_image = cv2.merge((b, g, r))
# Display the modified image
cv2.imshow('Modified Image', modified_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
```
**Explanation:**
- In this example, `cv2.add(r, 50)` increases the intensity of the red channel by 50.
- After modification, the channels are merged back to create the final image.
""")
# Navigation Button
if st.button(":blue[Video capture and explanation]"):
st.session_state.page = "Video_capture_and_explanation"
if st.button(":blue[Back to Unstructured Collection]"):
st.session_state.page = "unstructured_data"
# Navigation Button
if st.button(":red[Back to Data Collection]"):
st.session_state.page = "data_collection"
#---------VIDEO CAPTURE AND EXPLANATION OF CV2.WAITKEY-----------
elif st.session_state.page == "Video_capture_and_explanation":
st.header(":red[πŸŽ₯ Video Capture with `cv2.VideoCapture()`]")
st.markdown("""
**Purpose**: Captures live video from a webcam or reads a video file using OpenCV.
### Syntax
```python
cap = cv2.VideoCapture(source)
source:
0: Refers to the default webcam (if you have one connected).
'video.mp4': The path to a video file (can be any supported video format like .mp4, .avi).
```
Key Methods:
- cap.read(): Captures a frame-by-frame video from the source.
Returns:
- ret: A Boolean indicating whether the frame was read correctly (True if successful).
- frame: The captured frame, represented as a NumPy array (this can be processed or displayed).
- cap.release(): Releases the video source when you are done capturing. It frees up system resources and allows you to safely close the video capture device or file.
Example:
Here’s an example that captures video from the default webcam and displays it:
```python
import cv2
# Open the default webcam (0)
cap = cv2.VideoCapture(0)
while cap.isOpened():
ret, frame = cap.read() # Capture frame-by-frame
if not ret:
break # Exit if frame not read correctly
cv2.imshow('Live Video', frame) # Display the frame
# Wait for 1 ms and exit if 'q' is pressed
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release() # Release the webcam
cv2.destroyAllWindows() # Close all OpenCV windows
```
How it Works:
- cv2.VideoCapture(0): Opens the default webcam (if available).
- cap.read(): Reads each frame from the video source.
- cv2.imshow('Live Video', frame): Displays each captured frame in a window.
- cap.release(): Releases the video capture object when done capturing frames.
- cv2.destroyAllWindows(): Closes all OpenCV windows to free up resources.
""")
##----------##
st.header(":red[⏱️ cv2.waitKey() for Key Event Handling]")
st.markdown("""
Purpose:
cv2.waitKey() is a key function used to handle keyboard events in OpenCV. It is commonly used to display images or video frames and wait for a user input.
Syntax:
```python
cv2.waitKey(delay)
```
delay:
- 0: Waits indefinitely until a key is pressed. This is useful when displaying images or video and you want to hold the display open until a key is pressed.
- 1: Waits for 1 millisecond. This is commonly used in real-time video streaming where the program keeps checking for user input every 1 millisecond.
How it Works:
- cv2.waitKey(1): This line waits for a key press for 1 millisecond before checking if the user has pressed any key. If no key is pressed within that time, it proceeds to the next frame.
- Key Event: The function returns an integer value representing the ASCII code of the key pressed. For example, pressing the 'q' key returns 113 (the ASCII value for 'q').
Example:
Here’s an example using cv2.waitKey() to exit the video capture loop when the 'q' key is pressed:
```python
if cv2.waitKey(1) & 0xFF == ord('q'):
break
```
Explanation:
- ord('q'): Converts the 'q' character to its ASCII value (113).
- & 0xFF: Masks the higher bits of the returned value to only check for the lower 8 bits, ensuring correct handling of the key press.
Why is cv2.waitKey() Important?
- It helps manage user input while displaying images or videos.
- Without cv2.waitKey(), the OpenCV window would immediately close after displaying the image/video, and you would not be able to interact with it.
- It enables frame-by-frame processing in real-time video processing (such as live video capture or webcam feeds).
Example in Context:
```python
import cv2
# Open the default webcam (0)
cap = cv2.VideoCapture(0)
while cap.isOpened():
ret, frame = cap.read() # Capture frame-by-frame
if not ret:
break # Exit if frame not read correctly
cv2.imshow('Webcam Feed', frame) # Display the frame
# Wait for 1 ms and exit if 'q' is pressed
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release() # Release the webcam
cv2.destroyAllWindows() # Close all OpenCV windows
```
Explanation:
- cv2.VideoCapture(0): Initializes the webcam.
- cap.read(): Captures each frame from the webcam.
- cv2.imshow('Webcam Feed', frame): Displays the captured frame.
- cv2.waitKey(1): Checks for key press every 1 millisecond. If the 'q' key is pressed, the loop breaks, and the webcam feed stops.
- cap.release(): Releases the webcam when done.
- cv2.destroyAllWindows(): Closes the OpenCV windows and cleans up resources.
""")
###------KEY POINTS -----###
st.markdown("""
1. **Video Capture (`cv2.VideoCapture`)**: Opens and reads video either from the webcam or from a video file.
- **Method `cap.read()`**: Captures individual frames from the video source.
- **Releasing the capture (`cap.release()`)**: Ensures that the resources are freed once done.
2. **Key Handling (`cv2.waitKey`)**: Waits for user key input and processes it:
- **`cv2.waitKey(1)`**: Checks for key presses every 1 millisecond.
- **Exiting the loop**: Pressing the `'q'` key exits the video capture loop.
This explanation provides both the purpose and practical use cases of `cv2.VideoCapture()` and `cv2.waitKey()` in video capture scenarios, including how the two work together to display video and handle key events effectively.
""")
# Navigation Button
if st.button(":blue[Affine Transformation Matrix]"):
st.session_state.page = "Affine_Transformation_Matrix"
st.link_button(":blue[HitHub project Link]","https://github.com/Jayasree417/Animation-Using-Open-CV")
if st.button(":blue[Back to Unstructured Collection]"):
st.session_state.page = "unstructured_data"
# Navigation Button
if st.button(":red[Back to Data Collection]"):
st.session_state.page = "data_collection"
# ----- AFFINE TRANSFORMATION MATRIX -----
elif st.session_state.page == "Affine_Transformation_Matrix":
# Header for Affine Transformation Matrix
st.header(":red[Affine Transformation Matrix]")
# Description of Affine Transformation
st.markdown("""
An **Affine Transformation** is a linear mapping method that preserves points, straight lines, and planes. In other words, it maintains the structure of the original object while allowing for operations like translation, scaling, rotation, reflection, and shearing. Affine transformations are widely used in computer graphics, computer vision, image processing, and geometry.
Affine transformations can be represented by a **transformation matrix** of the following form:
\\[
T(x, y) = \\begin{bmatrix} a & b & tx \\\\ c & d & ty \\\\ 0 & 0 & 1 \\end{bmatrix} \\begin{bmatrix} x \\\\ y \\\\ 1 \\end{bmatrix}
\\]
- The **matrix elements (a, b, c, d)** control the linear transformation (scaling, rotation, and shearing).
- The elements **tx and ty** represent translation (shifting the coordinates).
### How the Transformation Works
Given a point \\((x, y)\\), applying an affine transformation produces a new point \\((x', y')\\) calculated as:
\\[
\\begin{bmatrix} x' \\\\ y' \\\\ 1 \\end{bmatrix} = \\begin{bmatrix} a & b & tx \\\\ c & d & ty \\\\ 0 & 0 & 1 \\end{bmatrix} \\begin{bmatrix} x \\\\ y \\\\ 1 \\end{bmatrix}
\\]
This means:
- \\(x' = a \\cdot x + b \\cdot y + tx\\)
- \\(y' = c \\cdot x + d \\cdot y + ty\\)
Affine transformations can be visualized as applying a series of transformations to geometric shapes.
""")
# Key Points Section
st.header(":red[Key Points of Affine Transformations]")
st.markdown("""
### 1. **Preserves Collinearity**
- Points that lie on a straight line before transformation remain on a straight line after transformation.
### 2. **Preserves Ratios of Distances**
- The ratio of distances between points on a line remains unchanged after transformation.
### 3. **Common Operations**
Affine transformations can perform the following operations:
- **Translation**: Moves the object along the x and y axes.
- **Scaling**: Changes the size of the object (uniform or non-uniform).
- **Rotation**: Rotates the object around a specific point (usually the origin).
- **Shearing**: Skews the object along one or both axes.
- **Reflection**: Mirrors the object about a specific axis (e.g., x-axis or y-axis).
### 4. **2D Affine Transformation Matrix**
The general 2D affine transformation matrix can be expressed as:
\\[
\\begin{bmatrix} a & b & tx \\\\ c & d & ty \\\\ 0 & 0 & 1 \\end{bmatrix}
\\]
Where:
- \\(a, b, c, d\\) represent the linear transformations (scaling, rotation, shearing).
- \\(tx, ty\\) represent translation.
### 5. **Combining Transformations**
- Multiple affine transformations can be combined by multiplying their matrices.
- **Order Matters**: The order in which transformations are applied affects the final result (matrix multiplication is non-commutative).
### 6. **Applications of Affine Transformations**
- **Computer Graphics**: Transforming and rendering shapes and images.
- **Image Processing**: Geometric operations like rotation, scaling, and shearing of images.
- **Computer Vision**: Object detection, pattern recognition, and image alignment.
- **Robotics**: Coordinate transformations for motion planning and navigation.
- **Geographical Information Systems (GIS)**: Map projection and alignment.
### 7. **Homogeneous Coordinates**
Using homogeneous coordinates \\((x, y, 1)\\) allows us to unify translation with linear transformations in a single matrix operation. This simplifies the combination and chaining of multiple transformations.
""")
if st.button(":blue[Back to Unstructured Collection]"):
st.session_state.page = "unstructured_data"
if st.button(":red[Back to Data Collection]"):
st.session_state.page = "data_collection"
# ----------------- Semi-Structured Data Page -----------------
elif st.session_state.page == "semi_structured_data":
st.title(":blue[Semi-Structured Data]")
st.markdown("""
Semi-structured data does not have a rigid structure but contains tags and markers to separate different data elements, like Csv, XML or JSON.
""")
if st.button(":blue[XML Data]"):
st.session_state.page = "xml"
if st.button(":blue[JSON Data]"):
st.session_state.page = "json"
if st.button(":blue[HTML Data]"):
st.session_state.page = "html"
if st.button(":blue[πŸ“‘ CSV]"):
st.session_state.page = "csv"
if st.button(":red[Back to Data Collection]"):
st.session_state.page = "data_collection"
# ----------------- CSV Data Page -----------------
elif st.session_state.page == "csv":
st.title(":red[CSV Data Format]")
st.markdown("""
CSV (Comma-Separated Values) is a simple format used to store tabular data. Each line in the file represents a row, and commas separate the values within the row.
""")
st.markdown("### How to Read a CSV file")
st.code("""
import pandas as pd
# Read a CSV file
df = pd.read_csv('data.csv')
print(df)
""", language='python')
st.markdown("### Issues Encountered")
st.write("""
- *File not found*: Incorrect file path.
- *Wrong delimiter*: The CSV uses a different delimiter (e.g., semicolon).
- *Missing Libraries*: pandas might be missing.
""")
st.write("### Solutions")
st.code("""
# Install required libraries
# pip install pandas
# Handle file not found
try:
df = pd.read_csv('data.csv')
except FileNotFoundError:
print("File not found. Check the file path.")
# Handle incorrect delimiter
df = pd.read_csv('data.csv', delimiter=';')
""", language='python')
st.link_button(":blue[Jupyter Notebook(colab)]","https://colab.research.google.com/drive/10MHcHTn40RcRA80TMvyXLiIwmU94Nt4N?usp=sharing")
if st.button(":red[Back to Structured Data]"):
st.session_state.page = "structured_data"
# ----------------- XML Data Page -----------------
elif st.session_state.page == "xml":
st.title(":blue[XML Data Format]")
st.write("""
XML (Extensible Markup Language) is a markup language that defines rules for encoding documents in a format that is both human-readable and machine-readable.
""")
st.markdown("### Example: Reading XML data")
st.code("""
import xml.etree.ElementTree as ET
tree = ET.parse('data.xml')
root = tree.getroot()
for elem in root:
print(elem.tag, elem.text)
""", language='python')
st.write("### Issues Encountered")
st.write("""
- *File not found*: Incorrect file path.
""")
st.write("### Solutions to These Issues")
st.code("""
try:
tree = ET.parse('data.xml')
root = tree.getroot()
except FileNotFoundError:
print("File not found. Check the file path.")
""", language='python')
st.link_button(":blue[Jupyter Notebook(colab)]","https://colab.research.google.com/drive/1jXSPACETyJ5OMKx7Nhx2Mn75V9n9WIyY?usp=sharing")
if st.button(":red[Back to Semi-Structured Data]"):
st.session_state.page = "semi_structured_data"
# ----------------- JSON Data Page -----------------
elif st.session_state.page == "json":
st.title(":blue[JSON Data Format]")
st.write("""
JSON (JavaScript Object Notation) is a lightweight data-interchange format that is easy for humans to read and write, and easy for machines to parse and generate.
""")
st.markdown("### Example: Reading JSON data")
st.code("""
import json
# Reading a JSON file
with open('data.json', 'r') as file:
data = json.load(file)
print(data)
""", language='python')
st.write("### Issues Encountered")
st.write("""
- *File not found*: Incorrect file path.
""")
st.write("### Solutions to These Issues")
st.code("""
try:
with open('data.json', 'r') as file:
data = json.load(file)
except FileNotFoundError:
print("File not found. Check the file path.")
""", language='python')
st.link_button(":blue[Jupyter Notebook(colab)]","https://colab.research.google.com/drive/12VF_YSzYvILWHOHKQQu2SeytOLSooN7K?usp=sharing")
if st.button(":red[Back to Semi-Structured Data]"):
st.session_state.page = "semi_structured_data"
# ----------------- HTML Data Page -----------------
elif st.session_state.page == "html":
st.title(":blue[HTML Data Format]")
st.write("""
HTML (HyperText Markup Language) is the standard language for creating webpages. It uses a markup structure to format text, images, and other content on the web.
""")
st.markdown("### Example: Reading HTML data")
st.code("""
import pandas as pd
# Reading HTML data
dfs = pd.read_html('sample.html')
print(dfs[0]) # Display the first table from the HTML file
""", language='python')
st.write("### Issues Encountered")
st.write("""
- *File not found*: Incorrect file path.
- *Missing Libraries*: pandas might be missing.
""")
st.write("### Solutions to These Issues")
st.code("""
# Install required libraries
# pip install pandas
# Handle file not found
try:
dfs = pd.read_html('sample.html')
except FileNotFoundError:
print("File not found. Check the file path.")
""", language='python')
st.link_button(":blue[Jupyter Notebook(colab)]","https://colab.research.google.com/drive/1yBgo4h_RNlc2KJApxXEWZXEZCwWHos_n?usp=sharing")
if st.button(":red[Back to Semi-Structured Data]"):
st.session_state.page = "semi_structured_data"