Spaces:

satya11
/

Natural_Language_Processing

Sleeping

App Files Files Community

satya11 commited on Apr 6, 2025

Commit

26456db

verified ·

1 Parent(s): 3bb1ecc

Update pages/2. Life Cycle of NLP.py

Browse files

Files changed (1) hide show

pages/2. Life Cycle of NLP.py +330 -82

pages/2. Life Cycle of NLP.py CHANGED Viewed

@@ -1,109 +1,357 @@
 import streamlit as st
-import streamlit as st
-st.markdown(
-    """
     <style>
-        body {
-            background-color: #f9f9f9; /* Light gray background */
-            font-family: 'Arial', sans-serif;
-        }
-        @keyframes fadeIn {
-            0% { opacity: 0; transform: translateY(-20px); }
-            100% { opacity: 1; transform: translateY(0); }
-        }
-        .title {
-            text-align: center;
-            color: black
-            font-size: 3rem;
-            font-weight: bold;
-            animation: fadeIn 1s ease-in-out;
-        }
-        .caption {
-            text-align: center;
-            font-style: italic;
-            font-size: 1.2rem;
-            color: black
-            animation: fadeIn 1.5s ease-in-out;
-        }
-        .section {
-            font-size: 1.1rem;
-            text-align: justify;
-            line-height: 1.8;
-            color: #34495e; /* Muted gray */
-            background: #ffffff; /* White card-style background */
-            padding: 20px;
-            border-radius: 10px;
-            box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
-            animation: fadeIn 2s ease-in-out;
-            margin: 10px 0;
-        }
-        .image-container {
-            text-align: center;
-            margin: 20px 0;
-            animation: fadeIn 2.5s ease-in-out;
-        }
-        .image-container img {
-            border-radius: 15px;
-            box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
-            transition: transform 0.3s ease-in-out;
-        }
-        .image-container img:hover {
-            transform: scale(1.05); /* Subtle zoom effect */
-        }
-        .sidebar {
-            width: 200px;
-        }
     </style>
-    """,
-    unsafe_allow_html=True,
-)
-st.sidebar.title("NLP Life Cycle Navigation")
-step = st.sidebar.radio("Choose a step in NLP Life Cycle",
-                       ("Problem Statement", "Data Collection", "Simple EDA", "Data Pre-processing", "EDA",
-                        "Feature Engineering", "Training", "Testing", "Deployment/Monitoring"))
-st.title("**Life Cycle of NLP**")
-st.caption("Navigating the journey of NLP from start to deployment!...")
-st.markdown(
-    """
     <div class='image-container'>
-        <img src="https://cdn-uploads.huggingface.co/production/uploads/66bde9bf3c885d04498227a0/5NnNw23wcvLOTXpNGCqbF.png" alt="NLP Image">
     </div>
-    """,
-    unsafe_allow_html=True,
-)
 if step == "Problem Statement":
-    st.markdown("<div class='section'><b>Problem Statement</b><br>Every NLP project begins by identifying the problem that needs solving. It could range from sentiment analysis to machine translation, based on the requirements.</div>", unsafe_allow_html=True)
 elif step == "Data Collection":
-    st.markdown("<div class='section'><b>Data Collection</b><br>The next step is to gather relevant text data from various sources such as servers, web-scrapping(text).</div>", unsafe_allow_html=True)
 elif step == "Simple EDA":
-    st.markdown("<div class='section'><b>Simple EDA</b><br>Before diving deep into modeling, it's crucial to understand the data. Simple EDA gives the quality of the collected text data.</div>", unsafe_allow_html=True)
 elif step == "Data Pre-processing":
-    st.markdown("<div class='section'><b>Data Pre-processing</b><br>Pre-processing includes cleaning the data and pre-processing using different techniques based on the problem statement.</div>", unsafe_allow_html=True)
 elif step == "EDA":
-    st.markdown("<div class='section'><b>EDA (Exploratory Data Analysis)</b><br>In this deeper phase of EDA, visualizations like word clouds, bar plots, and heatmaps are created to gain insights into the data. Identifying correlations, trends, and outliers is crucial here.</div>", unsafe_allow_html=True)
 elif step == "Feature Engineering":
-    st.markdown("<div class='section'><b>Feature Engineering</b><br>Feature engineering involves creating new features or transforming existing ones to better represent the data for machine learning models.Convert text into numerical format(**Vectorization**)</div>", unsafe_allow_html=True)
 elif step == "Training":
-    st.markdown("<div class='section'><b>Training</b><br>The model is trained using the pre-processed data.</div>", unsafe_allow_html=True)
 elif step == "Testing":
-    st.markdown("<div class='section'><b>Testing</b><br>After training, the model is evaluated on a separate test dataset.</div>", unsafe_allow_html=True)
 elif step == "Deployment/Monitoring":
-    st.markdown("<div class='section'><b>Deployment and Monitoring</b><br>Once the model is trained and tested, it is deployed into a real-world environment. Continuous monitoring is needed to ensure the model performs well over time, especially as new data comes in.</div>", unsafe_allow_html=True)

 import streamlit as st
+# Custom CSS with modern design
+st.markdown("""
     <style>
+    :root {
+        --primary: #3498db;
+        --secondary: #2ecc71;
+        --accent: #e74c3c;
+        --dark: #2c3e50;
+        --light: #ecf0f1;
+    }
+    @keyframes fadeIn {
+        0% { opacity: 0; transform: translateY(-20px); }
+        100% { opacity: 1; transform: translateY(0); }
+    }
+    .title-container {
+        background: linear-gradient(135deg, var(--primary), var(--secondary));
+        padding: 2rem;
+        border-radius: 15px;
+        box-shadow: 0 4px 20px rgba(0,0,0,0.1);
+        margin-bottom: 2rem;
+        animation: fadeIn 0.8s ease-out;
+    }
+    .title-text {
+        color: white !important;
+        font-family: 'Segoe UI', Tahoma, Geneva, Verdana;
+        text-align: center;
+        font-size: 2.8rem !important;
+        text-shadow: 1px 1px 3px rgba(0,0,0,0.2);
+    }
+    .caption-text {
+        text-align: center;
+        color: white !important;
+        font-size: 1.3rem;
+        opacity: 0.9;
+    }
+    .step-card {
+        background: white;
+        border-radius: 12px;
+        padding: 1.8rem;
+        margin: 1.2rem 0;
+        box-shadow: 0 6px 12px rgba(0,0,0,0.08);
+        border-left: 5px solid var(--primary);
+        transition: all 0.3s ease;
+        animation: fadeIn 1s ease-out;
+    }
+    .step-card:hover {
+        transform: translateY(-5px);
+        box-shadow: 0 10px 20px rgba(0,0,0,0.15);
+        border-left-color: var(--accent);
+    }
+    .step-title {
+        color: var(--dark) !important;
+        font-size: 1.5rem !important;
+        margin-bottom: 1rem !important;
+        display: flex;
+        align-items: center;
+        gap: 10px;
+    }
+    .step-content {
+        color: #34495e;
+        line-height: 1.8;
+        font-size: 1.1rem;
+    }
+    .image-container {
+        text-align: center;
+        margin: 2rem 0;
+        border-radius: 15px;
+        overflow: hidden;
+        box-shadow: 0 8px 25px rgba(0,0,0,0.12);
+        transition: transform 0.4s;
+    }
+    .image-container:hover {
+        transform: scale(1.02);
+    }
+    .sidebar .sidebar-content {
+        background: white !important;
+        padding: 1.5rem !important;
+    }
+    .sidebar-title {
+        color: var(--dark) !important;
+        font-size: 1.4rem !important;
+        border-bottom: 2px solid var(--primary);
+        padding-bottom: 0.5rem;
+    }
+    .stRadio > div {
+        flex-direction: column;
+        gap: 0.8rem;
+    }
+    .stRadio label {
+        padding: 0.8rem 1rem;
+        border-radius: 8px;
+        transition: all 0.2s;
+    }
+    .stRadio label:hover {
+        background: #f8f9fa !important;
+    }
+    .stRadio [data-baseweb="radio"]:checked + div {
+        background: var(--primary) !important;
+    }
+    .tech-badge {
+        display: inline-block;
+        background: #e0f7fa;
+        color: #00796b;
+        padding: 0.3rem 0.8rem;
+        border-radius: 20px;
+        font-size: 0.9rem;
+        margin: 0.3rem;
+        font-weight: 500;
+    }
     </style>
+""", unsafe_allow_html=True)
+# Sidebar Navigation
+with st.sidebar:
+    st.markdown("<div class='sidebar-title'>🔍 NLP Life Cycle Navigator</div>", unsafe_allow_html=True)
+    step = st.radio(
+        "Choose a phase:",
+        ("Problem Statement", "Data Collection", "Simple EDA", "Data Pre-processing",
+         "EDA", "Feature Engineering", "Training", "Testing", "Deployment/Monitoring"),
+        label_visibility="collapsed"
+    )
+# Main Content
+st.markdown("""
+    <div class='title-container'>
+        <div class='title-text'>Life Cycle of NLP</div>
+        <div class='caption-text'>From problem definition to deployment - A complete journey</div>
+    </div>
+""", unsafe_allow_html=True)
+# Interactive Image
+st.markdown("""
     <div class='image-container'>
+        <img src="https://cdn-uploads.huggingface.co/production/uploads/66bde9bf3c885d04498227a0/5NnNw23wcvLOTXpNGCqbF.png"
+             alt="NLP Lifecycle" width="100%">
     </div>
+""", unsafe_allow_html=True)
+# Content Cards with Enhanced Features
 if step == "Problem Statement":
+    with st.container():
+        st.markdown("""
+            <div class='step-card'>
+                <div class='step-title'>📌 Problem Statement</div>
+                <div class='step-content'>
+                    Every NLP project begins by identifying the problem that needs solving. Common NLP problems include:
+                    <ul style='margin-top: 0.5rem;'>
+                        <li>Sentiment Analysis</li>
+                        <li>Machine Translation</li>
+                        <li>Named Entity Recognition</li>
+                        <li>Text Classification</li>
+                    </ul>
+                    <div style='margin-top: 1rem;'>
+                        <span class='tech-badge'>Business Understanding</span>
+                        <span class='tech-badge'>Requirement Analysis</span>
+                    </div>
+                </div>
+            </div>
+        """, unsafe_allow_html=True)
 elif step == "Data Collection":
+    with st.container():
+        st.markdown("""
+            <div class='step-card'>
+                <div class='step-title'>📊 Data Collection</div>
+                <div class='step-content'>
+                    Gathering relevant text data from various sources:
+                    <ul style='margin-top: 0.5rem;'>
+                        <li>Web scraping (BeautifulSoup, Scrapy)</li>
+                        <li>Public datasets (Kaggle, HuggingFace)</li>
+                        <li>APIs (Twitter, Reddit)</li>
+                        <li>Internal databases</li>
+                    </ul>
+                    <div style='margin-top: 1rem;'>
+                        <span class='tech-badge'>Web Scraping</span>
+                        <span class='tech-badge'>APIs</span>
+                        <span class='tech-badge'>Data Lakes</span>
+                    </div>
+                </div>
+            </div>
+        """, unsafe_allow_html=True)
 elif step == "Simple EDA":
+    with st.container():
+        st.markdown("""
+            <div class='step-card'>
+                <div class='step-title'>🔍 Simple EDA</div>
+                <div class='step-content'>
+                    Preliminary analysis to understand data quality:
+                    <ul style='margin-top: 0.5rem;'>
+                        <li>Basic statistics (word counts, document lengths)</li>
+                        <li>Missing value analysis</li>
+                        <li>Language detection</li>
+                        <li>Duplicate detection</li>
+                    </ul>
+                    <div style='margin-top: 1rem;'>
+                        <span class='tech-badge'>Pandas Profiling</span>
+                        <span class='tech-badge'>TextStat</span>
+                    </div>
+                </div>
+            </div>
+        """, unsafe_allow_html=True)
 elif step == "Data Pre-processing":
+    with st.container():
+        st.markdown("""
+            <div class='step-card'>
+                <div class='step-title'>🧹 Data Pre-processing</div>
+                <div class='step-content'>
+                    Cleaning and preparing text data for analysis:
+                    <ul style='margin-top: 0.5rem;'>
+                        <li>Lowercasing</li>
+                        <li>Tokenization</li>
+                        <li>Stopword removal</li>
+                        <li>Stemming/Lemmatization</li>
+                        <li>Handling special characters</li>
+                    </ul>
+                    <div style='margin-top: 1rem;'>
+                        <span class='tech-badge'>NLTK</span>
+                        <span class='tech-badge'>spaCy</span>
+                        <span class='tech-badge'>Regex</span>
+                    </div>
+                </div>
+            </div>
+        """, unsafe_allow_html=True)
 elif step == "EDA":
+    with st.container():
+        st.markdown("""
+            <div class='step-card'>
+                <div class='step-title'>📈 EDA (Exploratory Data Analysis)</div>
+                <div class='step-content'>
+                    Deep analysis with visualizations:
+                    <ul style='margin-top: 0.5rem;'>
+                        <li>Word clouds</li>
+                        <li>Frequency distributions</li>
+                        <li>N-gram analysis</li>
+                        <li>Sentiment distribution</li>
+                        <li>Topic modeling visualization</li>
+                    </ul>
+                    <div style='margin-top: 1rem;'>
+                        <span class='tech-badge'>Matplotlib</span>
+                        <span class='tech-badge'>Seaborn</span>
+                        <span class='tech-badge'>Plotly</span>
+                    </div>
+                </div>
+            </div>
+        """, unsafe_allow_html=True)
 elif step == "Feature Engineering":
+    with st.container():
+        st.markdown("""
+            <div class='step-card'>
+                <div class='step-title'>⚙️ Feature Engineering</div>
+                <div class='step-content'>
+                    Converting text into numerical representations:
+                    <ul style='margin-top: 0.5rem;'>
+                        <li>Bag-of-Words (CountVectorizer)</li>
+                        <li>TF-IDF</li>
+                        <li>Word Embeddings (Word2Vec, GloVe)</li>
+                        <li>Contextual Embeddings (BERT)</li>
+                    </ul>
+                    <div style='margin-top: 1rem;'>
+                        <span class='tech-badge'>Scikit-learn</span>
+                        <span class='tech-badge'>Gensim</span>
+                        <span class='tech-badge'>Transformers</span>
+                    </div>
+                </div>
+            </div>
+        """, unsafe_allow_html=True)
 elif step == "Training":
+    with st.container():
+        st.markdown("""
+            <div class='step-card'>
+                <div class='step-title'>🤖 Training</div>
+                <div class='step-content'>
+                    Model development phase:
+                    <ul style='margin-top: 0.5rem;'>
+                        <li>Algorithm selection (Naive Bayes, LSTM, BERT)</li>
+                        <li>Hyperparameter tuning</li>
+                        <li>Cross-validation</li>
+                        <li>GPU acceleration</li>
+                    </ul>
+                    <div style='margin-top: 1rem;'>
+                        <span class='tech-badge'>TensorFlow</span>
+                        <span class='tech-badge'>PyTorch</span>
+                        <span class='tech-badge'>HuggingFace</span>
+                    </div>
+                </div>
+            </div>
+        """, unsafe_allow_html=True)
 elif step == "Testing":
+    with st.container():
+        st.markdown("""
+            <div class='step-card'>
+                <div class='step-title'>🧪 Testing</div>
+                <div class='step-content'>
+                    Model evaluation and validation:
+                    <ul style='margin-top: 0.5rem;'>
+                        <li>Accuracy, Precision, Recall metrics</li>
+                        <li>Confusion matrices</li>
+                        <li>A/B testing</li>
+                        <li>Error analysis</li>
+                    </ul>
+                    <div style='margin-top: 1rem;'>
+                        <span class='tech-badge'>Scikit-learn</span>
+                        <span class='tech-badge'>MLflow</span>
+                        <span class='tech-badge'>Weights & Biases</span>
+                    </div>
+                </div>
+            </div>
+        """, unsafe_allow_html=True)
 elif step == "Deployment/Monitoring":
+    with st.container():
+        st.markdown("""
+            <div class='step-card'>
+                <div class='step-title'>🚀 Deployment & Monitoring</div>
+                <div class='step-content'>
+                    Productionizing the model:
+                    <ul style='margin-top: 0.5rem;'>
+                        <li>API development (FastAPI, Flask)</li>
+                        <li>Containerization (Docker)</li>
+                        <li>Cloud deployment (AWS, GCP)</li>
+                        <li>Performance monitoring</li>
+                        <li>Model retraining pipelines</li>
+                    </ul>
+                    <div style='margin-top: 1rem;'>
+                        <span class='tech-badge'>FastAPI</span>
+                        <span class='tech-badge'>Docker</span>
+                        <span class='tech-badge'>Kubernetes</span>
+                        <span class='tech-badge'>Prometheus</span>
+                    </div>
+                </div>
+            </div>
+        """, unsafe_allow_html=True)