DOMMETI commited on
Commit
7bde025
·
verified ·
1 Parent(s): 2f145ad

Update pages/3_Life_Cycle_Of_Ml.py

Browse files
Files changed (1) hide show
  1. pages/3_Life_Cycle_Of_Ml.py +70 -54
pages/3_Life_Cycle_Of_Ml.py CHANGED
@@ -1,6 +1,4 @@
1
  import streamlit as st
2
- import pandas as pd
3
- import numpy as np
4
 
5
  # Apply custom CSS styling
6
  st.markdown("""
@@ -49,59 +47,77 @@ st.markdown("""
49
  </style>
50
  """, unsafe_allow_html=True)
51
 
52
- # Main title
53
- st.title("Lifecycle of a Machine Learning Project")
54
 
55
- # Steps of the ML lifecycle
56
- steps = [
57
- "Problem Statement",
58
- "Collect the Data",
59
- "Simple EDA (Exploratory Data Analysis)",
60
- "Data Processing",
61
- "Original EDA",
62
- "Feature Engineering",
63
- "Training the Model",
64
- "Testing the Model",
65
- "Deployment",
66
- "Monitoring",
67
- ]
68
-
69
- # Sidebar navigation
70
- st.sidebar.title("Navigation")
71
- selected_step = st.sidebar.radio("Steps in ML Lifecycle", steps)
72
-
73
-
74
- if selected_step == "Problem Statement":
75
- st.subheader("Define the Problem")
76
-
77
- elif selected_step == "Collect the Data":
78
- st.subheader("Gather Relevant Data")
79
-
80
- elif selected_step == "Simple EDA (Exploratory Data Analysis)":
81
- st.subheader("Initial Data Exploration")
82
-
83
- elif selected_step == "Data Processing":
84
- st.subheader("Clean and Prepare Data")
85
-
86
- elif selected_step == "Original EDA":
87
- st.subheader("Detailed Data Exploration")
88
-
89
- elif selected_step == "Feature Engineering":
90
- st.subheader("Feature Engineering")
91
-
92
-
93
- elif selected_step == "Training the Model":
94
- st.subheader("Train the Model")
95
-
96
-
97
- elif selected_step == "Testing the Model":
98
- st.subheader("Evaluate Model Performance")
99
-
100
-
101
- elif selected_step == "Deployment":
102
- st.subheader("Deploy the Model")
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
- elif selected_step == "Monitoring":
106
- st.subheader("Monitor the Model")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
 
1
  import streamlit as st
 
 
2
 
3
  # Apply custom CSS styling
4
  st.markdown("""
 
47
  </style>
48
  """, unsafe_allow_html=True)
49
 
50
+ # Page Configuration
51
+ st.set_page_config(page_title="Interactive NLP Guide", layout="wide")
52
 
53
+ # Page Title
54
+ st.markdown("<h1>Interactive NLP Guide</h1>", unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
+ # Introduction Section
57
+ st.markdown("<h2>Introduction to Natural Language Processing (NLP)</h2>", unsafe_allow_html=True)
58
+ st.markdown("""
59
+ <p>
60
+ Natural Language Processing (NLP) is a field at the intersection of linguistics and computer science, focusing on the interaction between humans and machines via natural language. NLP powers applications such as:
61
+ </p>
62
+ <ul class="icon-bullet">
63
+ <li>Chatbots and Virtual Assistants</li>
64
+ <li>Machine Translation (e.g., Google Translate)</li>
65
+ <li>Text Summarization</li>
66
+ <li>Sentiment Analysis</li>
67
+ <li>Speech Recognition Systems</li>
68
+ </ul>
69
+ """, unsafe_allow_html=True)
70
+
71
+ # Tokenization Section
72
+ st.markdown("<h2>Tokenization</h2>", unsafe_allow_html=True)
73
+ st.markdown("<h3>What is Tokenization?</h3>", unsafe_allow_html=True)
74
+ st.markdown("""
75
+ <p>
76
+ Tokenization is the process of breaking down a text into smaller units, such as sentences or words, called tokens. It's the foundational step in any NLP pipeline.
77
+ </p>
78
+ """, unsafe_allow_html=True)
79
 
80
+ st.markdown("""
81
+ <h3>Types of Tokenization:</h3>
82
+ <ul class="icon-bullet">
83
+ <li><strong>Word Tokenization:</strong> Splitting text into words (e.g., "I love NLP." → ["I", "love", "NLP"])</li>
84
+ <li><strong>Sentence Tokenization:</strong> Splitting text into sentences (e.g., "NLP is fascinating. It's the future." → ["NLP is fascinating.", "It's the future."])</li>
85
+ </ul>
86
+ """, unsafe_allow_html=True)
87
+
88
+ # Example Code
89
+ st.markdown("<h3>Code Example:</h3>", unsafe_allow_html=True)
90
+ st.code("""
91
+ from nltk.tokenize import word_tokenize, sent_tokenize
92
+
93
+ text = "Natural Language Processing is exciting. Let's explore it!"
94
+ word_tokens = word_tokenize(text)
95
+ sentence_tokens = sent_tokenize(text)
96
+
97
+ print("Word Tokens:", word_tokens)
98
+ print("Sentence Tokens:", sentence_tokens)
99
+ """, language="python")
100
+
101
+ # Adding more sections
102
+ st.markdown("<h2>Other NLP Techniques</h2>", unsafe_allow_html=True)
103
+ st.markdown("""
104
+ <p>
105
+ As you explore NLP, here are other important techniques and their brief explanations:
106
+ </p>
107
+ <ul class="icon-bullet">
108
+ <li><strong>One-Hot Vectorization:</strong> A simple representation of text where each unique word is represented as a binary vector.</li>
109
+ <li><strong>Bag of Words:</strong> Represents text as the frequency of each word, disregarding word order.</li>
110
+ <li><strong>TF-IDF:</strong> Highlights important words by considering both frequency in a document and rarity across documents.</li>
111
+ <li><strong>Word Embeddings:</strong> Dense vector representations of words that capture their semantic meanings.</li>
112
+ </ul>
113
+ """, unsafe_allow_html=True)
114
+
115
+ st.markdown("<h3>Key Takeaways:</h3>", unsafe_allow_html=True)
116
+ st.markdown("""
117
+ <ul class="icon-bullet">
118
+ <li>Tokenization is the foundation of most NLP tasks.</li>
119
+ <li>NLP techniques can transform unstructured text into structured formats for analysis.</li>
120
+ <li>Tools like NLTK, SpaCy, and Hugging Face make NLP accessible to developers and researchers.</li>
121
+ </ul>
122
+ """, unsafe_allow_html=True)
123