Update pages/3_Terminology.py
Browse files- pages/3_Terminology.py +152 -0
pages/3_Terminology.py
CHANGED
|
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
st.markdown(
|
| 5 |
+
"""
|
| 6 |
+
<style>
|
| 7 |
+
body {
|
| 8 |
+
background-color: #f9f9f9; /* Light background */
|
| 9 |
+
font-family: 'Arial', sans-serif;
|
| 10 |
+
}
|
| 11 |
+
@keyframes fadeIn {
|
| 12 |
+
0% { opacity: 0; transform: translateY(-20px); }
|
| 13 |
+
100% { opacity: 1; transform: translateY(0); }
|
| 14 |
+
}
|
| 15 |
+
.title {
|
| 16 |
+
text-align: center;
|
| 17 |
+
color: black
|
| 18 |
+
font-size: 3rem;
|
| 19 |
+
font-weight: bold;
|
| 20 |
+
animation: fadeIn 1.5s ease-in-out;
|
| 21 |
+
}
|
| 22 |
+
.caption {
|
| 23 |
+
text-align: center;
|
| 24 |
+
font-style: italic;
|
| 25 |
+
font-size: 1.2rem;
|
| 26 |
+
color: black
|
| 27 |
+
animation: fadeIn 2s ease-in-out;
|
| 28 |
+
}
|
| 29 |
+
.section {
|
| 30 |
+
font-size: 1.1rem;
|
| 31 |
+
text-align: justify;
|
| 32 |
+
line-height: 1.8;
|
| 33 |
+
color: #34495e; /* Muted gray */
|
| 34 |
+
background: #ffffff; /* White background */
|
| 35 |
+
padding: 20px;
|
| 36 |
+
border-radius: 10px;
|
| 37 |
+
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
|
| 38 |
+
animation: fadeIn 2.5s ease-in-out;
|
| 39 |
+
margin: 15px 0;
|
| 40 |
+
}
|
| 41 |
+
.term {
|
| 42 |
+
font-weight: bold;
|
| 43 |
+
color: black
|
| 44 |
+
animation: fadeIn 3s ease-in-out;
|
| 45 |
+
}
|
| 46 |
+
.definition {
|
| 47 |
+
font-style: italic;
|
| 48 |
+
color: #34495e;
|
| 49 |
+
animation: fadeIn 3.5s ease-in-out;
|
| 50 |
+
}
|
| 51 |
+
</style>
|
| 52 |
+
""",
|
| 53 |
+
unsafe_allow_html=True,
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
st.markdown("<h1 class='title'>NLP Terminology</h1>", unsafe_allow_html=True)
|
| 57 |
+
|
| 58 |
+
st.markdown(
|
| 59 |
+
"<p class='caption'>Explore essential terms in Natural Language Processing and their meanings!...</p>",
|
| 60 |
+
unsafe_allow_html=True,
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
st.markdown(
|
| 64 |
+
"""
|
| 65 |
+
<p class="section"><span class="term">Documents</span><br>
|
| 66 |
+
It is a collection of sentence / paragraph / single word / single character
|
| 67 |
+
</p>
|
| 68 |
+
""",
|
| 69 |
+
unsafe_allow_html=True,
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
st.markdown(
|
| 73 |
+
"""
|
| 74 |
+
<p class="section"><span class="term">Stemming</span><br>
|
| 75 |
+
Stemming is the process of reducing words to their base or root form. For example, "running" becomes "run." It helps in reducing the complexity of text data by grouping similar words together.
|
| 76 |
+
</p>
|
| 77 |
+
""",
|
| 78 |
+
unsafe_allow_html=True,
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
st.markdown(
|
| 82 |
+
"""
|
| 83 |
+
<p class="section"><span class="term">Lemmatization</span><br>
|
| 84 |
+
Lemmatization is a more advanced form of stemming that reduces words to their base form by considering the context and meaning. For example, "better" becomes "good" based on its usage in a sentence.
|
| 85 |
+
</p>
|
| 86 |
+
""",
|
| 87 |
+
unsafe_allow_html=True,
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
st.markdown(
|
| 91 |
+
"""
|
| 92 |
+
<p class="section"><span class="term">Named Entity Recognition (NER)</span><br>
|
| 93 |
+
NER is the task of identifying and classifying named entities in text, such as person names, locations, organizations, and dates. This technique is useful in tasks like information retrieval and summarization.
|
| 94 |
+
</p>
|
| 95 |
+
""",
|
| 96 |
+
unsafe_allow_html=True,
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
st.markdown(
|
| 100 |
+
"""
|
| 101 |
+
<p class="section"><span class="term">Part-of-Speech (POS) Tagging</span><br>
|
| 102 |
+
POS tagging involves labeling each word in a sentence with its grammatical category, such as noun, verb, or adjective. It helps in understanding the syntactic structure of the text.
|
| 103 |
+
</p>
|
| 104 |
+
""",
|
| 105 |
+
unsafe_allow_html=True,
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
st.markdown(
|
| 109 |
+
"""
|
| 110 |
+
<p class="section"><span class="term">Word Embeddings</span><br>
|
| 111 |
+
Word embeddings are numerical representations of words in a continuous vector space, where similar words are closer together. Common techniques include Word2Vec, GloVe, and FastText.
|
| 112 |
+
</p>
|
| 113 |
+
""",
|
| 114 |
+
unsafe_allow_html=True,
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
st.markdown(
|
| 118 |
+
"""
|
| 119 |
+
<p class="section"><span class="term">Bag-of-Words (BoW)</span><br>
|
| 120 |
+
Bag-of-Words is a simple representation of text data where each word is treated as a feature. The order of words is ignored, and the text is represented by a frequency count of words in the document.
|
| 121 |
+
</p>
|
| 122 |
+
""",
|
| 123 |
+
unsafe_allow_html=True,
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
st.markdown(
|
| 127 |
+
"""
|
| 128 |
+
<p class="section"><span class="term">TF-IDF (Term Frequency - Inverse Document Frequency)</span><br>
|
| 129 |
+
TF-IDF is a statistic used to evaluate the importance of a word in a document relative to all other documents. It balances the frequency of a word in a document with its rarity across the entire dataset.
|
| 130 |
+
</p>
|
| 131 |
+
""",
|
| 132 |
+
unsafe_allow_html=True,
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
st.markdown(
|
| 136 |
+
"""
|
| 137 |
+
<p class="section"><span class="term">Sentiment Analysis</span><br>
|
| 138 |
+
Sentiment Analysis is the task of determining the sentiment or opinion expressed in text. It is often used to analyze social media posts, customer feedback, and reviews to gauge public opinion.
|
| 139 |
+
</p>
|
| 140 |
+
""",
|
| 141 |
+
unsafe_allow_html=True,
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
st.markdown(
|
| 145 |
+
"""
|
| 146 |
+
<p class="section"><span class="term">Language Model</span><br>
|
| 147 |
+
A language model predicts the probability of a sequence of words occurring in a sentence. Popular models include GPT, BERT, and LSTM, which help in text generation, translation, and summarization tasks.
|
| 148 |
+
</p>
|
| 149 |
+
""",
|
| 150 |
+
unsafe_allow_html=True,
|
| 151 |
+
)
|
| 152 |
+
|