Update pages/3_Terminology.py
Browse files- pages/3_Terminology.py +45 -84
pages/3_Terminology.py
CHANGED
|
@@ -100,61 +100,37 @@ st.markdown(
|
|
| 100 |
"<p class='caption'>Explore essential terms in Natural Language Processing and their meanings!...</p>",
|
| 101 |
unsafe_allow_html=True,
|
| 102 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
-
st.
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
</p>
|
| 109 |
-
""",
|
| 110 |
-
unsafe_allow_html=True,
|
| 111 |
-
)
|
| 112 |
|
| 113 |
-
st.
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
</p>
|
| 118 |
-
""",
|
| 119 |
-
unsafe_allow_html=True,
|
| 120 |
-
)
|
| 121 |
|
| 122 |
-
st.
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
</p>
|
| 127 |
-
""",
|
| 128 |
-
unsafe_allow_html=True,
|
| 129 |
-
)
|
| 130 |
|
| 131 |
-
st.
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
</p>
|
| 136 |
-
""",
|
| 137 |
-
unsafe_allow_html=True,
|
| 138 |
-
)
|
| 139 |
|
| 140 |
-
st.
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
</p>
|
| 145 |
-
""",
|
| 146 |
-
unsafe_allow_html=True,
|
| 147 |
-
)
|
| 148 |
|
| 149 |
-
st.
|
| 150 |
-
"""
|
| 151 |
-
<p class="section"><span class="term">Tokenization</span><br>
|
| 152 |
-
It is a technique by using which we can convert a huge chunk into small entity where those small entities are known as tokens.
|
| 153 |
-
</p>
|
| 154 |
-
""",
|
| 155 |
-
unsafe_allow_html=True,
|
| 156 |
-
)
|
| 157 |
-
st.header("Types of Tokenization")
|
| 158 |
st.markdown("""
|
| 159 |
<ul class="icon-bullet">
|
| 160 |
<li>Sentence tokenization</li>
|
|
@@ -178,40 +154,25 @@ st.markdown('''
|
|
| 178 |
- It is a technique by using which we can convert a huge chunk into small entity where those small entities are known as tokens which are in characters.
|
| 179 |
''')
|
| 180 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
|
| 182 |
-
st.
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
</p>
|
| 187 |
-
""",
|
| 188 |
-
unsafe_allow_html=True,
|
| 189 |
-
)
|
| 190 |
-
|
| 191 |
-
st.markdown(
|
| 192 |
-
"""
|
| 193 |
-
<p class="section"><span class="term">TF-IDF (Term Frequency - Inverse Document Frequency)</span><br>
|
| 194 |
-
TF-IDF is a statistic used to evaluate the importance of a word in a document relative to all other documents. It balances the frequency of a word in a document with its rarity across the entire dataset.
|
| 195 |
-
</p>
|
| 196 |
-
""",
|
| 197 |
-
unsafe_allow_html=True,
|
| 198 |
-
)
|
| 199 |
-
|
| 200 |
-
st.markdown(
|
| 201 |
-
"""
|
| 202 |
-
<p class="section"><span class="term">Sentiment Analysis</span><br>
|
| 203 |
-
Sentiment Analysis is the task of determining the sentiment or opinion expressed in text. It is often used to analyze social media posts, customer feedback, and reviews to gauge public opinion.
|
| 204 |
-
</p>
|
| 205 |
-
""",
|
| 206 |
-
unsafe_allow_html=True,
|
| 207 |
-
)
|
| 208 |
-
|
| 209 |
-
st.markdown(
|
| 210 |
-
"""
|
| 211 |
-
<p class="section"><span class="term">Language Model</span><br>
|
| 212 |
-
A language model predicts the probability of a sequence of words occurring in a sentence. Popular models include GPT, BERT, and LSTM, which help in text generation, translation, and summarization tasks.
|
| 213 |
-
</p>
|
| 214 |
-
""",
|
| 215 |
-
unsafe_allow_html=True,
|
| 216 |
-
)
|
| 217 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
"<p class='caption'>Explore essential terms in Natural Language Processing and their meanings!...</p>",
|
| 101 |
unsafe_allow_html=True,
|
| 102 |
)
|
| 103 |
+
st.header("Document")
|
| 104 |
+
st.markdown('''
|
| 105 |
+
- Document is defined as collection of sentence / paragraph / single word / single character
|
| 106 |
+
''')
|
| 107 |
|
| 108 |
+
st.header("Paragraph")
|
| 109 |
+
st.markdown('''
|
| 110 |
+
- Paragraph is defined as collection of sentence.
|
| 111 |
+
''')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
+
st.header("Sentence")
|
| 114 |
+
st.markdown('''
|
| 115 |
+
- Sentence is defined as collection of words.
|
| 116 |
+
''')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
+
st.header("Word")
|
| 119 |
+
st.markdown('''
|
| 120 |
+
- Words are defined as collection of characters
|
| 121 |
+
''')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
+
st.header("Character")
|
| 124 |
+
st.markdown('''
|
| 125 |
+
- Character can either be in number , alphabets or special symbol.
|
| 126 |
+
''')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
|
| 128 |
+
st.header("Tokenization")
|
| 129 |
+
st.markdown('''
|
| 130 |
+
- It is a technique by using which we can convert a huge chunk into small entity where those small entities are known as tokens.
|
| 131 |
+
''')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
|
| 133 |
+
st.subheader("Types of Tokenization")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
st.markdown("""
|
| 135 |
<ul class="icon-bullet">
|
| 136 |
<li>Sentence tokenization</li>
|
|
|
|
| 154 |
- It is a technique by using which we can convert a huge chunk into small entity where those small entities are known as tokens which are in characters.
|
| 155 |
''')
|
| 156 |
|
| 157 |
+
st.header("Stop Words")
|
| 158 |
+
st.markdown('''
|
| 159 |
+
- They are set of words which didn't have impact on the meaning of sentence / paragraph
|
| 160 |
+
- Stop words are used to make the grammar very clear
|
| 161 |
+
''')
|
| 162 |
|
| 163 |
+
st.header("Vectorization")
|
| 164 |
+
st.markdown('''
|
| 165 |
+
- It is a technique which helps us to convert a text into vector format
|
| 166 |
+
''')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
|
| 168 |
+
st.subheader("Different types of techniques")
|
| 169 |
+
st.markdown("""
|
| 170 |
+
<ul class="icon-bullet">
|
| 171 |
+
<li>One-Hot Vectorization </li>
|
| 172 |
+
<li>Bag of Words</li>
|
| 173 |
+
<li>TF-IDF (Term Frequency and Inverse Document Frequency)</li>
|
| 174 |
+
<li>Word2Vector</li>
|
| 175 |
+
<li>Glove</li>
|
| 176 |
+
<li>Fast text</li>
|
| 177 |
+
</ul>
|
| 178 |
+
""", unsafe_allow_html=True)
|