File size: 6,182 Bytes
3bb1ecc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bbd452e
3bb1ecc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59d4e46
3bb1ecc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a6e7558
3bb1ecc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0674f87
3bb1ecc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0674f87
3bb1ecc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
import streamlit as st

# Custom CSS with modern design and animations
st.markdown("""
    <style>
    :root {
        --primary: #2E86C1;
        --secondary: #AED6F1;
        --accent: #FF6B6B;
    }
    
    body {
        background: linear-gradient(45deg, #f8f9fa, #e9ecef);
        font-family: 'Segoe UI', system-ui;
    }
    
    .title-box {
        background: linear-gradient(45deg, var(--primary), var(--secondary));
        padding: 2rem;
        border-radius: 15px;
        box-shadow: 0 4px 6px rgba(0,0,0,0.1);
        margin-bottom: 2rem;
    }
    
    h1 {
        color: white !important;
        font-family: 'Arial Rounded MT Bold';
        text-align: center;
        font-size: 2.5rem !important;
        text-shadow: 2px 2px 4px rgba(0,0,0,0.2);
    }
    
    .term-card {
        background: black;
        border-radius: 10px;
        padding: 1.5rem;
        margin: 1rem 0;
        box-shadow: 0 2px 4px rgba(0,0,0,0.05);
        transition: transform 0.2s;
        border-left: 4px solid var(--primary);
    }
    
    .term-card:hover {
        transform: translateY(-3px);
        box-shadow: 0 4px 6px rgba(0,0,0,0.1);
    }
    
    .custom-icon {
        font-size: 1.5rem;
        margin-right: 0.5rem;
    }
    
    .sidebar .sidebar-content {
        background: black !important;
        border-right: 2px solid var(--secondary);
    }
    
    .stExpander {
        border: none !important;
        box-shadow: none !important;
    }
    
    mark {
        background-color: var(--secondary);
        padding: 0.2em 0.4em;
        border-radius: 4px;
    }
    </style>
""", unsafe_allow_html=True)

# Sidebar with navigation
with st.sidebar:
    st.header("πŸ” Navigation")
    page_section = st.radio("Jump to:", [
        "Basic Terms", 
        "Tokenization", 
        "Vectorization",
        "Advanced Concepts"
    ])

# Main content
st.markdown("""
    <div class='title-box'>
        <h1>πŸ“š NLP Terminology Explorer</h1>
    </div>
""", unsafe_allow_html=True)

# Basic Terms Section
if page_section == "Basic Terms":
    st.markdown("### πŸ” Foundational Concepts")
    
    terms = [
        ("πŸ“š Corpus", "A collection of documents"),
        ("πŸ“„ Document", "Collection of sentences, paragraphs, or text elements"),
        ("πŸ“ Paragraph", "Multiple sentences forming a coherent block"),
        ("πŸ’¬ Sentence", "Complete grammatical unit of words"),
        ("πŸ”€ Word", "Basic unit of language with meaning"),
        ("πŸ”  Character", "Individual letters, numbers, or symbols")
    ]
    
    for term, definition in terms:
        with st.expander(term):
            st.markdown(f"""
                <div class='term-card'>
                    <p style='font-size: 1.1rem; color: white;'>{definition}</p>
                </div>
            """, unsafe_allow_html=True)

# Tokenization Section
elif page_section == "Tokenization":
    st.markdown("### βœ‚οΈ Text Segmentation Techniques")
    
    col1, col2 = st.columns([2, 3])
    
    with col1:
        st.markdown("""
            <div class='term-card'>
                <h4>What is Tokenization?</h4>
                <p>Process of breaking text into smaller meaningful units called tokens</p>
            </div>
        """, unsafe_allow_html=True)
        
    with col2:
        with st.expander("πŸ“ Types of Tokenization"):
            st.markdown("""
                - **Sentence Tokenization** `(NLTK, spaCy)`  
                - **Word Tokenization** `(Treebank, Regex)`  
                - **Subword Tokenization** `(BPE, WordPiece)`  
                - **Character-level Tokenization**
            """)
            
    st.markdown("#### πŸ› οΈ Tokenization Examples")
    tab1, tab2, tab3 = st.tabs(["Sentence", "Word", "Character"])
    
    with tab1:
        st.code("Text: 'Hello world! NLP is awesome.'\nSentences: ['Hello world!', 'NLP is awesome.']")
        
    with tab2:
        st.code("Sentence: 'I love NLP!'\nWords: ['I', 'love', 'NLP', '!']")
        
    with tab3:
        st.code("Word: 'Hello'\nCharacters: ['H', 'e', 'l', 'l', 'o']")

# Vectorization Section
elif page_section == "Vectorization":
    st.markdown("### πŸ”’ Text Representation Methods")
    
    techniques = {
        "Bag of Words": "Count-based representation ignoring word order",
        "TF-IDF": "Statistical measure of word importance",
        "Word2Vec": "Neural network-based word embeddings",
        "BERT": "Contextual embeddings using transformers"
    }
    
    for tech, desc in techniques.items():
        with st.expander(f"πŸ“Š {tech}"):
            st.markdown(f"""
                <div style='padding: 1rem; background: black; border-radius: 8px;'>
                    <p>{desc}</p>
                    <small>Example: {'...'}</small>
                </div>
            """, unsafe_allow_html=True)

# Advanced Concepts Section
elif page_section == "Advanced Concepts":
    st.markdown("### 🧠 Advanced NLP Concepts")
    
    concepts = [
        ("🚫 Stop Words", "Common words filtered during processing", 
         "the, is, at, which, on"),
        ("🏷️ POS Tagging", "Identifying grammatical components", 
         "Noun, Verb, Adjective"),
        ("πŸ“ Dependency Parsing", "Analyzing grammatical structure",
         "Subject-verb relationships")
    ]
    
    for title, desc, examples in concepts:
        with st.expander(title):
            st.markdown(f"""
                <div class='term-card'>
                    <p><strong>{desc}</strong></p>
                    <div style='margin-top: 1rem; padding: 0.5rem; background: black; border-radius: 6px;'>
                        <small>Examples: {examples}</small>
                    </div>
                </div>
            """, unsafe_allow_html=True)

# Footer
st.markdown("---")
st.markdown("""
    <div style='text-align: center; color: #666; margin-top: 3rem;'>
        <p>πŸŽ“ Learn more about NLP with our interactive courses!</p>
        <button style='background: var(--primary); color: white; border: none; padding: 0.5rem 2rem; border-radius: 25px;'>
            Explore Courses
        </button>
    </div>
""", unsafe_allow_html=True)