satya11 commited on
Commit
3bb1ecc
Β·
verified Β·
1 Parent(s): 774b4cc

Create 3. Terminology.py

Browse files
Files changed (1) hide show
  1. pages/3. Terminology.py +197 -0
pages/3. Terminology.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ # Custom CSS with modern design and animations
4
+ st.markdown("""
5
+ <style>
6
+ :root {
7
+ --primary: #2E86C1;
8
+ --secondary: #AED6F1;
9
+ --accent: #FF6B6B;
10
+ }
11
+
12
+ body {
13
+ background: linear-gradient(45deg, #f8f9fa, #e9ecef);
14
+ font-family: 'Segoe UI', system-ui;
15
+ }
16
+
17
+ .title-box {
18
+ background: linear-gradient(45deg, var(--primary), var(--secondary));
19
+ padding: 2rem;
20
+ border-radius: 15px;
21
+ box-shadow: 0 4px 6px rgba(0,0,0,0.1);
22
+ margin-bottom: 2rem;
23
+ }
24
+
25
+ h1 {
26
+ color: white !important;
27
+ font-family: 'Arial Rounded MT Bold';
28
+ text-align: center;
29
+ font-size: 2.5rem !important;
30
+ text-shadow: 2px 2px 4px rgba(0,0,0,0.2);
31
+ }
32
+
33
+ .term-card {
34
+ background: white;
35
+ border-radius: 10px;
36
+ padding: 1.5rem;
37
+ margin: 1rem 0;
38
+ box-shadow: 0 2px 4px rgba(0,0,0,0.05);
39
+ transition: transform 0.2s;
40
+ border-left: 4px solid var(--primary);
41
+ }
42
+
43
+ .term-card:hover {
44
+ transform: translateY(-3px);
45
+ box-shadow: 0 4px 6px rgba(0,0,0,0.1);
46
+ }
47
+
48
+ .custom-icon {
49
+ font-size: 1.5rem;
50
+ margin-right: 0.5rem;
51
+ }
52
+
53
+ .sidebar .sidebar-content {
54
+ background: white !important;
55
+ border-right: 2px solid var(--secondary);
56
+ }
57
+
58
+ .stExpander {
59
+ border: none !important;
60
+ box-shadow: none !important;
61
+ }
62
+
63
+ mark {
64
+ background-color: var(--secondary);
65
+ padding: 0.2em 0.4em;
66
+ border-radius: 4px;
67
+ }
68
+ </style>
69
+ """, unsafe_allow_html=True)
70
+
71
+ # Sidebar with navigation
72
+ with st.sidebar:
73
+ st.header("πŸ” Navigation")
74
+ page_section = st.radio("Jump to:", [
75
+ "Basic Terms",
76
+ "Tokenization",
77
+ "Vectorization",
78
+ "Advanced Concepts"
79
+ ])
80
+
81
+ # Main content
82
+ st.markdown("""
83
+ <div class='title-box'>
84
+ <h1>πŸ“š NLP Terminology Explorer</h1>
85
+ </div>
86
+ """, unsafe_allow_html=True)
87
+
88
+ # Basic Terms Section
89
+ if page_section == "Basic Terms":
90
+ st.markdown("### πŸ” Foundational Concepts")
91
+
92
+ terms = [
93
+ ("πŸ“š Corpus", "A collection of documents"),
94
+ ("πŸ“„ Document", "Collection of sentences, paragraphs, or text elements"),
95
+ ("πŸ“ Paragraph", "Multiple sentences forming a coherent block"),
96
+ ("πŸ’¬ Sentence", "Complete grammatical unit of words"),
97
+ ("πŸ”€ Word", "Basic unit of language with meaning"),
98
+ ("πŸ”  Character", "Individual letters, numbers, or symbols")
99
+ ]
100
+
101
+ for term, definition in terms:
102
+ with st.expander(term):
103
+ st.markdown(f"""
104
+ <div class='term-card'>
105
+ <p style='font-size: 1.1rem; color: #333;'>{definition}</p>
106
+ </div>
107
+ """, unsafe_allow_html=True)
108
+
109
+ # Tokenization Section
110
+ elif page_section == "Tokenization":
111
+ st.markdown("### βœ‚οΈ Text Segmentation Techniques")
112
+
113
+ col1, col2 = st.columns([2, 3])
114
+
115
+ with col1:
116
+ st.markdown("""
117
+ <div class='term-card'>
118
+ <h4>What is Tokenization?</h4>
119
+ <p>Process of breaking text into smaller meaningful units called tokens</p>
120
+ </div>
121
+ """, unsafe_allow_html=True)
122
+
123
+ with col2:
124
+ with st.expander("πŸ“ Types of Tokenization"):
125
+ st.markdown("""
126
+ - **Sentence Tokenization** `(NLTK, spaCy)`
127
+ - **Word Tokenization** `(Treebank, Regex)`
128
+ - **Subword Tokenization** `(BPE, WordPiece)`
129
+ - **Character-level Tokenization**
130
+ """)
131
+
132
+ st.markdown("#### πŸ› οΈ Tokenization Examples")
133
+ tab1, tab2, tab3 = st.tabs(["Sentence", "Word", "Character"])
134
+
135
+ with tab1:
136
+ st.code("Text: 'Hello world! NLP is awesome.'\nSentences: ['Hello world!', 'NLP is awesome.']")
137
+
138
+ with tab2:
139
+ st.code("Sentence: 'I love NLP!'\nWords: ['I', 'love', 'NLP', '!']")
140
+
141
+ with tab3:
142
+ st.code("Word: 'Hello'\nCharacters: ['H', 'e', 'l', 'l', 'o']")
143
+
144
+ # Vectorization Section
145
+ elif page_section == "Vectorization":
146
+ st.markdown("### πŸ”’ Text Representation Methods")
147
+
148
+ techniques = {
149
+ "Bag of Words": "Count-based representation ignoring word order",
150
+ "TF-IDF": "Statistical measure of word importance",
151
+ "Word2Vec": "Neural network-based word embeddings",
152
+ "BERT": "Contextual embeddings using transformers"
153
+ }
154
+
155
+ for tech, desc in techniques.items():
156
+ with st.expander(f"πŸ“Š {tech}"):
157
+ st.markdown(f"""
158
+ <div style='padding: 1rem; background: #f8f9fa; border-radius: 8px;'>
159
+ <p>{desc}</p>
160
+ <small>Example: {'...'}</small>
161
+ </div>
162
+ """, unsafe_allow_html=True)
163
+
164
+ # Advanced Concepts Section
165
+ elif page_section == "Advanced Concepts":
166
+ st.markdown("### 🧠 Advanced NLP Concepts")
167
+
168
+ concepts = [
169
+ ("🚫 Stop Words", "Common words filtered during processing",
170
+ "the, is, at, which, on"),
171
+ ("🏷️ POS Tagging", "Identifying grammatical components",
172
+ "Noun, Verb, Adjective"),
173
+ ("πŸ“ Dependency Parsing", "Analyzing grammatical structure",
174
+ "Subject-verb relationships")
175
+ ]
176
+
177
+ for title, desc, examples in concepts:
178
+ with st.expander(title):
179
+ st.markdown(f"""
180
+ <div class='term-card'>
181
+ <p><strong>{desc}</strong></p>
182
+ <div style='margin-top: 1rem; padding: 0.5rem; background: #f0f8ff; border-radius: 6px;'>
183
+ <small>Examples: {examples}</small>
184
+ </div>
185
+ </div>
186
+ """, unsafe_allow_html=True)
187
+
188
+ # Footer
189
+ st.markdown("---")
190
+ st.markdown("""
191
+ <div style='text-align: center; color: #666; margin-top: 3rem;'>
192
+ <p>πŸŽ“ Learn more about NLP with our interactive courses!</p>
193
+ <button style='background: var(--primary); color: white; border: none; padding: 0.5rem 2rem; border-radius: 25px;'>
194
+ Explore Courses
195
+ </button>
196
+ </div>
197
+ """, unsafe_allow_html=True)