satya11 commited on
Commit
26456db
ยท
verified ยท
1 Parent(s): 3bb1ecc

Update pages/2. Life Cycle of NLP.py

Browse files
Files changed (1) hide show
  1. pages/2. Life Cycle of NLP.py +330 -82
pages/2. Life Cycle of NLP.py CHANGED
@@ -1,109 +1,357 @@
1
  import streamlit as st
2
 
3
- import streamlit as st
4
-
5
- st.markdown(
6
- """
7
  <style>
8
- body {
9
- background-color: #f9f9f9; /* Light gray background */
10
- font-family: 'Arial', sans-serif;
11
- }
12
- @keyframes fadeIn {
13
- 0% { opacity: 0; transform: translateY(-20px); }
14
- 100% { opacity: 1; transform: translateY(0); }
15
- }
16
- .title {
17
- text-align: center;
18
- color: black
19
- font-size: 3rem;
20
- font-weight: bold;
21
- animation: fadeIn 1s ease-in-out;
22
- }
23
- .caption {
24
- text-align: center;
25
- font-style: italic;
26
- font-size: 1.2rem;
27
- color: black
28
- animation: fadeIn 1.5s ease-in-out;
29
- }
30
- .section {
31
- font-size: 1.1rem;
32
- text-align: justify;
33
- line-height: 1.8;
34
- color: #34495e; /* Muted gray */
35
- background: #ffffff; /* White card-style background */
36
- padding: 20px;
37
- border-radius: 10px;
38
- box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
39
- animation: fadeIn 2s ease-in-out;
40
- margin: 10px 0;
41
- }
42
- .image-container {
43
- text-align: center;
44
- margin: 20px 0;
45
- animation: fadeIn 2.5s ease-in-out;
46
- }
47
- .image-container img {
48
- border-radius: 15px;
49
- box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
50
- transition: transform 0.3s ease-in-out;
51
- }
52
- .image-container img:hover {
53
- transform: scale(1.05); /* Subtle zoom effect */
54
- }
55
- .sidebar {
56
- width: 200px;
57
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  </style>
59
- """,
60
- unsafe_allow_html=True,
61
- )
62
-
63
- st.sidebar.title("NLP Life Cycle Navigation")
64
- step = st.sidebar.radio("Choose a step in NLP Life Cycle",
65
- ("Problem Statement", "Data Collection", "Simple EDA", "Data Pre-processing", "EDA",
66
- "Feature Engineering", "Training", "Testing", "Deployment/Monitoring"))
67
-
68
 
69
- st.title("**Life Cycle of NLP**")
70
- st.caption("Navigating the journey of NLP from start to deployment!...")
 
 
 
 
 
 
 
71
 
 
 
 
 
 
 
 
72
 
73
- st.markdown(
74
- """
75
  <div class='image-container'>
76
- <img src="https://cdn-uploads.huggingface.co/production/uploads/66bde9bf3c885d04498227a0/5NnNw23wcvLOTXpNGCqbF.png" alt="NLP Image">
 
77
  </div>
78
- """,
79
- unsafe_allow_html=True,
80
- )
81
-
82
 
 
83
  if step == "Problem Statement":
84
- st.markdown("<div class='section'><b>Problem Statement</b><br>Every NLP project begins by identifying the problem that needs solving. It could range from sentiment analysis to machine translation, based on the requirements.</div>", unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
  elif step == "Data Collection":
87
- st.markdown("<div class='section'><b>Data Collection</b><br>The next step is to gather relevant text data from various sources such as servers, web-scrapping(text).</div>", unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  elif step == "Simple EDA":
90
- st.markdown("<div class='section'><b>Simple EDA</b><br>Before diving deep into modeling, it's crucial to understand the data. Simple EDA gives the quality of the collected text data.</div>", unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
  elif step == "Data Pre-processing":
93
- st.markdown("<div class='section'><b>Data Pre-processing</b><br>Pre-processing includes cleaning the data and pre-processing using different techniques based on the problem statement.</div>", unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
  elif step == "EDA":
96
- st.markdown("<div class='section'><b>EDA (Exploratory Data Analysis)</b><br>In this deeper phase of EDA, visualizations like word clouds, bar plots, and heatmaps are created to gain insights into the data. Identifying correlations, trends, and outliers is crucial here.</div>", unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
  elif step == "Feature Engineering":
99
- st.markdown("<div class='section'><b>Feature Engineering</b><br>Feature engineering involves creating new features or transforming existing ones to better represent the data for machine learning models.Convert text into numerical format(**Vectorization**)</div>", unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  elif step == "Training":
102
- st.markdown("<div class='section'><b>Training</b><br>The model is trained using the pre-processed data.</div>", unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
  elif step == "Testing":
105
- st.markdown("<div class='section'><b>Testing</b><br>After training, the model is evaluated on a separate test dataset.</div>", unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
  elif step == "Deployment/Monitoring":
108
- st.markdown("<div class='section'><b>Deployment and Monitoring</b><br>Once the model is trained and tested, it is deployed into a real-world environment. Continuous monitoring is needed to ensure the model performs well over time, especially as new data comes in.</div>", unsafe_allow_html=True)
109
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
 
3
+ # Custom CSS with modern design
4
+ st.markdown("""
 
 
5
  <style>
6
+ :root {
7
+ --primary: #3498db;
8
+ --secondary: #2ecc71;
9
+ --accent: #e74c3c;
10
+ --dark: #2c3e50;
11
+ --light: #ecf0f1;
12
+ }
13
+
14
+ @keyframes fadeIn {
15
+ 0% { opacity: 0; transform: translateY(-20px); }
16
+ 100% { opacity: 1; transform: translateY(0); }
17
+ }
18
+
19
+ .title-container {
20
+ background: linear-gradient(135deg, var(--primary), var(--secondary));
21
+ padding: 2rem;
22
+ border-radius: 15px;
23
+ box-shadow: 0 4px 20px rgba(0,0,0,0.1);
24
+ margin-bottom: 2rem;
25
+ animation: fadeIn 0.8s ease-out;
26
+ }
27
+
28
+ .title-text {
29
+ color: white !important;
30
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana;
31
+ text-align: center;
32
+ font-size: 2.8rem !important;
33
+ text-shadow: 1px 1px 3px rgba(0,0,0,0.2);
34
+ }
35
+
36
+ .caption-text {
37
+ text-align: center;
38
+ color: white !important;
39
+ font-size: 1.3rem;
40
+ opacity: 0.9;
41
+ }
42
+
43
+ .step-card {
44
+ background: white;
45
+ border-radius: 12px;
46
+ padding: 1.8rem;
47
+ margin: 1.2rem 0;
48
+ box-shadow: 0 6px 12px rgba(0,0,0,0.08);
49
+ border-left: 5px solid var(--primary);
50
+ transition: all 0.3s ease;
51
+ animation: fadeIn 1s ease-out;
52
+ }
53
+
54
+ .step-card:hover {
55
+ transform: translateY(-5px);
56
+ box-shadow: 0 10px 20px rgba(0,0,0,0.15);
57
+ border-left-color: var(--accent);
58
+ }
59
+
60
+ .step-title {
61
+ color: var(--dark) !important;
62
+ font-size: 1.5rem !important;
63
+ margin-bottom: 1rem !important;
64
+ display: flex;
65
+ align-items: center;
66
+ gap: 10px;
67
+ }
68
+
69
+ .step-content {
70
+ color: #34495e;
71
+ line-height: 1.8;
72
+ font-size: 1.1rem;
73
+ }
74
+
75
+ .image-container {
76
+ text-align: center;
77
+ margin: 2rem 0;
78
+ border-radius: 15px;
79
+ overflow: hidden;
80
+ box-shadow: 0 8px 25px rgba(0,0,0,0.12);
81
+ transition: transform 0.4s;
82
+ }
83
+
84
+ .image-container:hover {
85
+ transform: scale(1.02);
86
+ }
87
+
88
+ .sidebar .sidebar-content {
89
+ background: white !important;
90
+ padding: 1.5rem !important;
91
+ }
92
+
93
+ .sidebar-title {
94
+ color: var(--dark) !important;
95
+ font-size: 1.4rem !important;
96
+ border-bottom: 2px solid var(--primary);
97
+ padding-bottom: 0.5rem;
98
+ }
99
+
100
+ .stRadio > div {
101
+ flex-direction: column;
102
+ gap: 0.8rem;
103
+ }
104
+
105
+ .stRadio label {
106
+ padding: 0.8rem 1rem;
107
+ border-radius: 8px;
108
+ transition: all 0.2s;
109
+ }
110
+
111
+ .stRadio label:hover {
112
+ background: #f8f9fa !important;
113
+ }
114
+
115
+ .stRadio [data-baseweb="radio"]:checked + div {
116
+ background: var(--primary) !important;
117
+ }
118
+
119
+ .tech-badge {
120
+ display: inline-block;
121
+ background: #e0f7fa;
122
+ color: #00796b;
123
+ padding: 0.3rem 0.8rem;
124
+ border-radius: 20px;
125
+ font-size: 0.9rem;
126
+ margin: 0.3rem;
127
+ font-weight: 500;
128
+ }
129
  </style>
130
+ """, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
131
 
132
+ # Sidebar Navigation
133
+ with st.sidebar:
134
+ st.markdown("<div class='sidebar-title'>๐Ÿ” NLP Life Cycle Navigator</div>", unsafe_allow_html=True)
135
+ step = st.radio(
136
+ "Choose a phase:",
137
+ ("Problem Statement", "Data Collection", "Simple EDA", "Data Pre-processing",
138
+ "EDA", "Feature Engineering", "Training", "Testing", "Deployment/Monitoring"),
139
+ label_visibility="collapsed"
140
+ )
141
 
142
+ # Main Content
143
+ st.markdown("""
144
+ <div class='title-container'>
145
+ <div class='title-text'>Life Cycle of NLP</div>
146
+ <div class='caption-text'>From problem definition to deployment - A complete journey</div>
147
+ </div>
148
+ """, unsafe_allow_html=True)
149
 
150
+ # Interactive Image
151
+ st.markdown("""
152
  <div class='image-container'>
153
+ <img src="https://cdn-uploads.huggingface.co/production/uploads/66bde9bf3c885d04498227a0/5NnNw23wcvLOTXpNGCqbF.png"
154
+ alt="NLP Lifecycle" width="100%">
155
  </div>
156
+ """, unsafe_allow_html=True)
 
 
 
157
 
158
+ # Content Cards with Enhanced Features
159
  if step == "Problem Statement":
160
+ with st.container():
161
+ st.markdown("""
162
+ <div class='step-card'>
163
+ <div class='step-title'>๐Ÿ“Œ Problem Statement</div>
164
+ <div class='step-content'>
165
+ Every NLP project begins by identifying the problem that needs solving. Common NLP problems include:
166
+ <ul style='margin-top: 0.5rem;'>
167
+ <li>Sentiment Analysis</li>
168
+ <li>Machine Translation</li>
169
+ <li>Named Entity Recognition</li>
170
+ <li>Text Classification</li>
171
+ </ul>
172
+ <div style='margin-top: 1rem;'>
173
+ <span class='tech-badge'>Business Understanding</span>
174
+ <span class='tech-badge'>Requirement Analysis</span>
175
+ </div>
176
+ </div>
177
+ </div>
178
+ """, unsafe_allow_html=True)
179
 
180
  elif step == "Data Collection":
181
+ with st.container():
182
+ st.markdown("""
183
+ <div class='step-card'>
184
+ <div class='step-title'>๐Ÿ“Š Data Collection</div>
185
+ <div class='step-content'>
186
+ Gathering relevant text data from various sources:
187
+ <ul style='margin-top: 0.5rem;'>
188
+ <li>Web scraping (BeautifulSoup, Scrapy)</li>
189
+ <li>Public datasets (Kaggle, HuggingFace)</li>
190
+ <li>APIs (Twitter, Reddit)</li>
191
+ <li>Internal databases</li>
192
+ </ul>
193
+ <div style='margin-top: 1rem;'>
194
+ <span class='tech-badge'>Web Scraping</span>
195
+ <span class='tech-badge'>APIs</span>
196
+ <span class='tech-badge'>Data Lakes</span>
197
+ </div>
198
+ </div>
199
+ </div>
200
+ """, unsafe_allow_html=True)
201
 
202
  elif step == "Simple EDA":
203
+ with st.container():
204
+ st.markdown("""
205
+ <div class='step-card'>
206
+ <div class='step-title'>๐Ÿ” Simple EDA</div>
207
+ <div class='step-content'>
208
+ Preliminary analysis to understand data quality:
209
+ <ul style='margin-top: 0.5rem;'>
210
+ <li>Basic statistics (word counts, document lengths)</li>
211
+ <li>Missing value analysis</li>
212
+ <li>Language detection</li>
213
+ <li>Duplicate detection</li>
214
+ </ul>
215
+ <div style='margin-top: 1rem;'>
216
+ <span class='tech-badge'>Pandas Profiling</span>
217
+ <span class='tech-badge'>TextStat</span>
218
+ </div>
219
+ </div>
220
+ </div>
221
+ """, unsafe_allow_html=True)
222
 
223
  elif step == "Data Pre-processing":
224
+ with st.container():
225
+ st.markdown("""
226
+ <div class='step-card'>
227
+ <div class='step-title'>๐Ÿงน Data Pre-processing</div>
228
+ <div class='step-content'>
229
+ Cleaning and preparing text data for analysis:
230
+ <ul style='margin-top: 0.5rem;'>
231
+ <li>Lowercasing</li>
232
+ <li>Tokenization</li>
233
+ <li>Stopword removal</li>
234
+ <li>Stemming/Lemmatization</li>
235
+ <li>Handling special characters</li>
236
+ </ul>
237
+ <div style='margin-top: 1rem;'>
238
+ <span class='tech-badge'>NLTK</span>
239
+ <span class='tech-badge'>spaCy</span>
240
+ <span class='tech-badge'>Regex</span>
241
+ </div>
242
+ </div>
243
+ </div>
244
+ """, unsafe_allow_html=True)
245
 
246
  elif step == "EDA":
247
+ with st.container():
248
+ st.markdown("""
249
+ <div class='step-card'>
250
+ <div class='step-title'>๐Ÿ“ˆ EDA (Exploratory Data Analysis)</div>
251
+ <div class='step-content'>
252
+ Deep analysis with visualizations:
253
+ <ul style='margin-top: 0.5rem;'>
254
+ <li>Word clouds</li>
255
+ <li>Frequency distributions</li>
256
+ <li>N-gram analysis</li>
257
+ <li>Sentiment distribution</li>
258
+ <li>Topic modeling visualization</li>
259
+ </ul>
260
+ <div style='margin-top: 1rem;'>
261
+ <span class='tech-badge'>Matplotlib</span>
262
+ <span class='tech-badge'>Seaborn</span>
263
+ <span class='tech-badge'>Plotly</span>
264
+ </div>
265
+ </div>
266
+ </div>
267
+ """, unsafe_allow_html=True)
268
 
269
  elif step == "Feature Engineering":
270
+ with st.container():
271
+ st.markdown("""
272
+ <div class='step-card'>
273
+ <div class='step-title'>โš™๏ธ Feature Engineering</div>
274
+ <div class='step-content'>
275
+ Converting text into numerical representations:
276
+ <ul style='margin-top: 0.5rem;'>
277
+ <li>Bag-of-Words (CountVectorizer)</li>
278
+ <li>TF-IDF</li>
279
+ <li>Word Embeddings (Word2Vec, GloVe)</li>
280
+ <li>Contextual Embeddings (BERT)</li>
281
+ </ul>
282
+ <div style='margin-top: 1rem;'>
283
+ <span class='tech-badge'>Scikit-learn</span>
284
+ <span class='tech-badge'>Gensim</span>
285
+ <span class='tech-badge'>Transformers</span>
286
+ </div>
287
+ </div>
288
+ </div>
289
+ """, unsafe_allow_html=True)
290
 
291
  elif step == "Training":
292
+ with st.container():
293
+ st.markdown("""
294
+ <div class='step-card'>
295
+ <div class='step-title'>๐Ÿค– Training</div>
296
+ <div class='step-content'>
297
+ Model development phase:
298
+ <ul style='margin-top: 0.5rem;'>
299
+ <li>Algorithm selection (Naive Bayes, LSTM, BERT)</li>
300
+ <li>Hyperparameter tuning</li>
301
+ <li>Cross-validation</li>
302
+ <li>GPU acceleration</li>
303
+ </ul>
304
+ <div style='margin-top: 1rem;'>
305
+ <span class='tech-badge'>TensorFlow</span>
306
+ <span class='tech-badge'>PyTorch</span>
307
+ <span class='tech-badge'>HuggingFace</span>
308
+ </div>
309
+ </div>
310
+ </div>
311
+ """, unsafe_allow_html=True)
312
 
313
  elif step == "Testing":
314
+ with st.container():
315
+ st.markdown("""
316
+ <div class='step-card'>
317
+ <div class='step-title'>๐Ÿงช Testing</div>
318
+ <div class='step-content'>
319
+ Model evaluation and validation:
320
+ <ul style='margin-top: 0.5rem;'>
321
+ <li>Accuracy, Precision, Recall metrics</li>
322
+ <li>Confusion matrices</li>
323
+ <li>A/B testing</li>
324
+ <li>Error analysis</li>
325
+ </ul>
326
+ <div style='margin-top: 1rem;'>
327
+ <span class='tech-badge'>Scikit-learn</span>
328
+ <span class='tech-badge'>MLflow</span>
329
+ <span class='tech-badge'>Weights & Biases</span>
330
+ </div>
331
+ </div>
332
+ </div>
333
+ """, unsafe_allow_html=True)
334
 
335
  elif step == "Deployment/Monitoring":
336
+ with st.container():
337
+ st.markdown("""
338
+ <div class='step-card'>
339
+ <div class='step-title'>๐Ÿš€ Deployment & Monitoring</div>
340
+ <div class='step-content'>
341
+ Productionizing the model:
342
+ <ul style='margin-top: 0.5rem;'>
343
+ <li>API development (FastAPI, Flask)</li>
344
+ <li>Containerization (Docker)</li>
345
+ <li>Cloud deployment (AWS, GCP)</li>
346
+ <li>Performance monitoring</li>
347
+ <li>Model retraining pipelines</li>
348
+ </ul>
349
+ <div style='margin-top: 1rem;'>
350
+ <span class='tech-badge'>FastAPI</span>
351
+ <span class='tech-badge'>Docker</span>
352
+ <span class='tech-badge'>Kubernetes</span>
353
+ <span class='tech-badge'>Prometheus</span>
354
+ </div>
355
+ </div>
356
+ </div>
357
+ """, unsafe_allow_html=True)