File size: 4,073 Bytes
7b57fd0 05e322c 7b57fd0 05e322c 27635a1 05e322c 5f0db14 05e322c 27635a1 05e322c 27635a1 05e322c 7b57fd0 05e322c 7b57fd0 f8cdeaa 628ffb3 f8cdeaa 5f0db14 f8cdeaa bcbd8c9 f8cdeaa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import streamlit as st
st.markdown("""
<style>
/* Set a soft background color */
body {
background-color: #eef2f7;
}
/* Style for main title */
h1 {
color: black;
font-family: 'Roboto', sans-serif;
font-weight: 700;
text-align: center;
margin-bottom: 25px;
}
/* Style for headers */
h2 {
color: red;
font-family: 'Roboto', sans-serif;
font-weight: 600;
margin-top: 30px;
}
/* Style for subheaders */
h3 {
color: violet;
font-family: 'Roboto', sans-serif;
font-weight: 500;
margin-top: 20px;
}
.custom-subheader {
color: violet;
font-family: 'Roboto', sans-serif;
font-weight: 600;
margin-bottom: 15px;
}
/* Paragraph styling */
p {
font-family: 'Georgia', serif;
line-height: 1.8;
color: black;
margin-bottom: 20px;
}
/* List styling with checkmark bullets */
.icon-bullet {
list-style-type: none;
padding-left: 20px;
}
.icon-bullet li {
font-family: 'Georgia', serif;
font-size: 1.1em;
margin-bottom: 10px;
color: black;
}
.icon-bullet li::before {
content: "β";
padding-right: 10px;
color: black;
}
/* Sidebar styling */
.sidebar .sidebar-content {
background-color: #ffffff;
border-radius: 10px;
padding: 15px;
}
.sidebar h2 {
color: #495057;
}
/* Custom button style */
.streamlit-button {
background-color: #00FFFF;
color: #000000;
font-weight: bold;
}
</style>
""", unsafe_allow_html=True)
st.markdown("<h1 class='title'>π NLP Terminology</h1>", unsafe_allow_html=True)
st.markdown("<p class='caption'>β¨ Explore essential terms in Natural Language Processing and their meanings!...</p>", unsafe_allow_html=True)
st.header("π Corpus")
st.markdown("- **A corpus** is a collection of documents.")
st.header("π Document")
st.markdown("- **A document** is a collection of sentences, paragraphs, single words, or even single characters.")
st.header("π Paragraph")
st.markdown("- **A paragraph** consists of multiple sentences.")
st.header("π’ Sentence")
st.markdown("- **A sentence** is a collection of words.")
st.header("π€ Word")
st.markdown("- **Words** are made up of characters.")
st.header("π Character")
st.markdown("- **A character** can be a number, alphabet, or special symbol.")
st.header("βοΈ Tokenization")
st.markdown("- **Tokenization** is a technique by using which we can convert a huge chunk into small entity where those small entities are known as tokens.")
st.subheader("π οΈ Types of Tokenization")
st.markdown("""
- πΉ **Sentence Tokenization** β Splits text into sentences.
- πΉ **Word Tokenization** β Splits sentences into words.
- πΉ **Character Tokenization** β Splits words into individual characters.
""")
st.subheader("π Sentence Tokenization")
st.markdown("- **Breaks a large text into meaningful sentence units.**")
st.subheader("π Word Tokenization")
st.markdown("- **Splits a sentence into individual words.**")
st.subheader("π‘ Character Tokenization")
st.markdown("- **Breaks words into separate characters.**")
st.header("π« Stop Words")
st.markdown("- **Common words** (e.g., 'the', 'is', 'and') that do not add meaning to the text but maintain grammatical structure.")
st.header("π Vectorization")
st.markdown("- **Transforms text into numerical representation** for machine learning models.")
st.subheader("π’ Different Types of Vectorization Techniques")
st.markdown("""
- π― **One-Hot Encoding**
- π·οΈ **Bag of Words (BoW)**
- π **TF-IDF (Term Frequency-Inverse Document Frequency)**
- π§ **Word2Vec**
- π **GloVe**
- β‘ **FastText**
""")
st.success("π Mastering these **NLP terminologies** will help you build powerful text-processing applications!") |