| import streamlit as st |
|
|
| st.markdown(""" |
| <style> |
| /* Set a soft background color */ |
| body { |
| background-color: #eef2f7; |
| } |
| /* Style for main title */ |
| h1 { |
| color: black; |
| font-family: 'Roboto', sans-serif; |
| font-weight: 700; |
| text-align: center; |
| margin-bottom: 25px; |
| } |
| /* Style for headers */ |
| h2 { |
| color: red; |
| font-family: 'Roboto', sans-serif; |
| font-weight: 600; |
| margin-top: 30px; |
| } |
| |
| /* Style for subheaders */ |
| h3 { |
| color: violet; |
| font-family: 'Roboto', sans-serif; |
| font-weight: 500; |
| margin-top: 20px; |
| } |
| .custom-subheader { |
| color: violet; |
| font-family: 'Roboto', sans-serif; |
| font-weight: 600; |
| margin-bottom: 15px; |
| } |
| /* Paragraph styling */ |
| p { |
| font-family: 'Georgia', serif; |
| line-height: 1.8; |
| color: black; |
| margin-bottom: 20px; |
| } |
| /* List styling with checkmark bullets */ |
| .icon-bullet { |
| list-style-type: none; |
| padding-left: 20px; |
| } |
| .icon-bullet li { |
| font-family: 'Georgia', serif; |
| font-size: 1.1em; |
| margin-bottom: 10px; |
| color: black; |
| } |
| .icon-bullet li::before { |
| content: "β"; |
| padding-right: 10px; |
| color: black; |
| } |
| /* Sidebar styling */ |
| .sidebar .sidebar-content { |
| background-color: #ffffff; |
| border-radius: 10px; |
| padding: 15px; |
| } |
| .sidebar h2 { |
| color: #495057; |
| } |
| /* Custom button style */ |
| .streamlit-button { |
| background-color: #00FFFF; |
| color: #000000; |
| font-weight: bold; |
| } |
| </style> |
| """, unsafe_allow_html=True) |
|
|
|
|
| st.markdown("<h1 class='title'>π NLP Terminology</h1>", unsafe_allow_html=True) |
| st.markdown("<p class='caption'>β¨ Explore essential terms in Natural Language Processing and their meanings!...</p>", unsafe_allow_html=True) |
|
|
| st.header("π Corpus") |
| st.markdown("- **A corpus** is a collection of documents.") |
|
|
| st.header("π Document") |
| st.markdown("- **A document** is a collection of sentences, paragraphs, single words, or even single characters.") |
|
|
| st.header("π Paragraph") |
| st.markdown("- **A paragraph** consists of multiple sentences.") |
|
|
| st.header("π’ Sentence") |
| st.markdown("- **A sentence** is a collection of words.") |
|
|
| st.header("π€ Word") |
| st.markdown("- **Words** are made up of characters.") |
|
|
| st.header("π Character") |
| st.markdown("- **A character** can be a number, alphabet, or special symbol.") |
|
|
| st.header("βοΈ Tokenization") |
| st.markdown("- **Tokenization** is a technique by using which we can convert a huge chunk into small entity where those small entities are known as tokens.") |
|
|
| st.subheader("π οΈ Types of Tokenization") |
| st.markdown(""" |
| - πΉ **Sentence Tokenization** β Splits text into sentences. |
| - πΉ **Word Tokenization** β Splits sentences into words. |
| - πΉ **Character Tokenization** β Splits words into individual characters. |
| """) |
|
|
| st.subheader("π Sentence Tokenization") |
| st.markdown("- **Breaks a large text into meaningful sentence units.**") |
|
|
| st.subheader("π Word Tokenization") |
| st.markdown("- **Splits a sentence into individual words.**") |
|
|
| st.subheader("π‘ Character Tokenization") |
| st.markdown("- **Breaks words into separate characters.**") |
|
|
| st.header("π« Stop Words") |
| st.markdown("- **Common words** (e.g., 'the', 'is', 'and') that do not add meaning to the text but maintain grammatical structure.") |
|
|
| st.header("π Vectorization") |
| st.markdown("- **Transforms text into numerical representation** for machine learning models.") |
|
|
| st.subheader("π’ Different Types of Vectorization Techniques") |
| st.markdown(""" |
| - π― **One-Hot Encoding** |
| - π·οΈ **Bag of Words (BoW)** |
| - π **TF-IDF (Term Frequency-Inverse Document Frequency)** |
| - π§ **Word2Vec** |
| - π **GloVe** |
| - β‘ **FastText** |
| """) |
|
|
| st.success("π Mastering these **NLP terminologies** will help you build powerful text-processing applications!") |