Spaces:
Sleeping
Sleeping
Update pages/3_Life_Cycle_Of_Ml.py
Browse files- pages/3_Life_Cycle_Of_Ml.py +70 -54
pages/3_Life_Cycle_Of_Ml.py
CHANGED
|
@@ -1,6 +1,4 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
-
import pandas as pd
|
| 3 |
-
import numpy as np
|
| 4 |
|
| 5 |
# Apply custom CSS styling
|
| 6 |
st.markdown("""
|
|
@@ -49,59 +47,77 @@ st.markdown("""
|
|
| 49 |
</style>
|
| 50 |
""", unsafe_allow_html=True)
|
| 51 |
|
| 52 |
-
#
|
| 53 |
-
st.
|
| 54 |
|
| 55 |
-
#
|
| 56 |
-
|
| 57 |
-
"Problem Statement",
|
| 58 |
-
"Collect the Data",
|
| 59 |
-
"Simple EDA (Exploratory Data Analysis)",
|
| 60 |
-
"Data Processing",
|
| 61 |
-
"Original EDA",
|
| 62 |
-
"Feature Engineering",
|
| 63 |
-
"Training the Model",
|
| 64 |
-
"Testing the Model",
|
| 65 |
-
"Deployment",
|
| 66 |
-
"Monitoring",
|
| 67 |
-
]
|
| 68 |
-
|
| 69 |
-
# Sidebar navigation
|
| 70 |
-
st.sidebar.title("Navigation")
|
| 71 |
-
selected_step = st.sidebar.radio("Steps in ML Lifecycle", steps)
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
if selected_step == "Problem Statement":
|
| 75 |
-
st.subheader("Define the Problem")
|
| 76 |
-
|
| 77 |
-
elif selected_step == "Collect the Data":
|
| 78 |
-
st.subheader("Gather Relevant Data")
|
| 79 |
-
|
| 80 |
-
elif selected_step == "Simple EDA (Exploratory Data Analysis)":
|
| 81 |
-
st.subheader("Initial Data Exploration")
|
| 82 |
-
|
| 83 |
-
elif selected_step == "Data Processing":
|
| 84 |
-
st.subheader("Clean and Prepare Data")
|
| 85 |
-
|
| 86 |
-
elif selected_step == "Original EDA":
|
| 87 |
-
st.subheader("Detailed Data Exploration")
|
| 88 |
-
|
| 89 |
-
elif selected_step == "Feature Engineering":
|
| 90 |
-
st.subheader("Feature Engineering")
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
elif selected_step == "Training the Model":
|
| 94 |
-
st.subheader("Train the Model")
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
elif selected_step == "Testing the Model":
|
| 98 |
-
st.subheader("Evaluate Model Performance")
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
elif selected_step == "Deployment":
|
| 102 |
-
st.subheader("Deploy the Model")
|
| 103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
-
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
|
|
|
| 1 |
import streamlit as st
|
|
|
|
|
|
|
| 2 |
|
| 3 |
# Apply custom CSS styling
|
| 4 |
st.markdown("""
|
|
|
|
| 47 |
</style>
|
| 48 |
""", unsafe_allow_html=True)
|
| 49 |
|
| 50 |
+
# Page Configuration
|
| 51 |
+
st.set_page_config(page_title="Interactive NLP Guide", layout="wide")
|
| 52 |
|
| 53 |
+
# Page Title
|
| 54 |
+
st.markdown("<h1>Interactive NLP Guide</h1>", unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
+
# Introduction Section
|
| 57 |
+
st.markdown("<h2>Introduction to Natural Language Processing (NLP)</h2>", unsafe_allow_html=True)
|
| 58 |
+
st.markdown("""
|
| 59 |
+
<p>
|
| 60 |
+
Natural Language Processing (NLP) is a field at the intersection of linguistics and computer science, focusing on the interaction between humans and machines via natural language. NLP powers applications such as:
|
| 61 |
+
</p>
|
| 62 |
+
<ul class="icon-bullet">
|
| 63 |
+
<li>Chatbots and Virtual Assistants</li>
|
| 64 |
+
<li>Machine Translation (e.g., Google Translate)</li>
|
| 65 |
+
<li>Text Summarization</li>
|
| 66 |
+
<li>Sentiment Analysis</li>
|
| 67 |
+
<li>Speech Recognition Systems</li>
|
| 68 |
+
</ul>
|
| 69 |
+
""", unsafe_allow_html=True)
|
| 70 |
+
|
| 71 |
+
# Tokenization Section
|
| 72 |
+
st.markdown("<h2>Tokenization</h2>", unsafe_allow_html=True)
|
| 73 |
+
st.markdown("<h3>What is Tokenization?</h3>", unsafe_allow_html=True)
|
| 74 |
+
st.markdown("""
|
| 75 |
+
<p>
|
| 76 |
+
Tokenization is the process of breaking down a text into smaller units, such as sentences or words, called tokens. It's the foundational step in any NLP pipeline.
|
| 77 |
+
</p>
|
| 78 |
+
""", unsafe_allow_html=True)
|
| 79 |
|
| 80 |
+
st.markdown("""
|
| 81 |
+
<h3>Types of Tokenization:</h3>
|
| 82 |
+
<ul class="icon-bullet">
|
| 83 |
+
<li><strong>Word Tokenization:</strong> Splitting text into words (e.g., "I love NLP." → ["I", "love", "NLP"])</li>
|
| 84 |
+
<li><strong>Sentence Tokenization:</strong> Splitting text into sentences (e.g., "NLP is fascinating. It's the future." → ["NLP is fascinating.", "It's the future."])</li>
|
| 85 |
+
</ul>
|
| 86 |
+
""", unsafe_allow_html=True)
|
| 87 |
+
|
| 88 |
+
# Example Code
|
| 89 |
+
st.markdown("<h3>Code Example:</h3>", unsafe_allow_html=True)
|
| 90 |
+
st.code("""
|
| 91 |
+
from nltk.tokenize import word_tokenize, sent_tokenize
|
| 92 |
+
|
| 93 |
+
text = "Natural Language Processing is exciting. Let's explore it!"
|
| 94 |
+
word_tokens = word_tokenize(text)
|
| 95 |
+
sentence_tokens = sent_tokenize(text)
|
| 96 |
+
|
| 97 |
+
print("Word Tokens:", word_tokens)
|
| 98 |
+
print("Sentence Tokens:", sentence_tokens)
|
| 99 |
+
""", language="python")
|
| 100 |
+
|
| 101 |
+
# Adding more sections
|
| 102 |
+
st.markdown("<h2>Other NLP Techniques</h2>", unsafe_allow_html=True)
|
| 103 |
+
st.markdown("""
|
| 104 |
+
<p>
|
| 105 |
+
As you explore NLP, here are other important techniques and their brief explanations:
|
| 106 |
+
</p>
|
| 107 |
+
<ul class="icon-bullet">
|
| 108 |
+
<li><strong>One-Hot Vectorization:</strong> A simple representation of text where each unique word is represented as a binary vector.</li>
|
| 109 |
+
<li><strong>Bag of Words:</strong> Represents text as the frequency of each word, disregarding word order.</li>
|
| 110 |
+
<li><strong>TF-IDF:</strong> Highlights important words by considering both frequency in a document and rarity across documents.</li>
|
| 111 |
+
<li><strong>Word Embeddings:</strong> Dense vector representations of words that capture their semantic meanings.</li>
|
| 112 |
+
</ul>
|
| 113 |
+
""", unsafe_allow_html=True)
|
| 114 |
+
|
| 115 |
+
st.markdown("<h3>Key Takeaways:</h3>", unsafe_allow_html=True)
|
| 116 |
+
st.markdown("""
|
| 117 |
+
<ul class="icon-bullet">
|
| 118 |
+
<li>Tokenization is the foundation of most NLP tasks.</li>
|
| 119 |
+
<li>NLP techniques can transform unstructured text into structured formats for analysis.</li>
|
| 120 |
+
<li>Tools like NLTK, SpaCy, and Hugging Face make NLP accessible to developers and researchers.</li>
|
| 121 |
+
</ul>
|
| 122 |
+
""", unsafe_allow_html=True)
|
| 123 |
|