| import streamlit as st | |
| st.markdown(""" | |
| <style> | |
| /* Set a soft background color */ | |
| body { | |
| background-color: #eef2f7; | |
| } | |
| /* Style for main title */ | |
| h1 { | |
| color: black; | |
| font-family: 'Roboto', sans-serif; | |
| font-weight: 700; | |
| text-align: center; | |
| margin-bottom: 25px; | |
| } | |
| /* Style for headers */ | |
| h2 { | |
| color: red; | |
| font-family: 'Roboto', sans-serif; | |
| font-weight: 600; | |
| margin-top: 30px; | |
| } | |
| /* Style for subheaders */ | |
| h3 { | |
| color: violet; | |
| font-family: 'Roboto', sans-serif; | |
| font-weight: 500; | |
| margin-top: 20px; | |
| } | |
| .custom-subheader { | |
| color: violet; | |
| font-family: 'Roboto', sans-serif; | |
| font-weight: 600; | |
| margin-bottom: 15px; | |
| } | |
| /* Paragraph styling */ | |
| p { | |
| font-family: 'Georgia', serif; | |
| line-height: 1.8; | |
| color: black; | |
| margin-bottom: 20px; | |
| } | |
| /* List styling with checkmark bullets */ | |
| .icon-bullet { | |
| list-style-type: none; | |
| padding-left: 20px; | |
| } | |
| .icon-bullet li { | |
| font-family: 'Georgia', serif; | |
| font-size: 1.1em; | |
| margin-bottom: 10px; | |
| color: black; | |
| } | |
| .icon-bullet li::before { | |
| content: "β"; | |
| padding-right: 10px; | |
| color: black; | |
| } | |
| /* Sidebar styling */ | |
| .sidebar .sidebar-content { | |
| background-color: #ffffff; | |
| border-radius: 10px; | |
| padding: 15px; | |
| } | |
| .sidebar h2 { | |
| color: #495057; | |
| } | |
| /* Custom button style */ | |
| .streamlit-button { | |
| background-color: #00FFFF; | |
| color: #000000; | |
| font-weight: bold; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| st.markdown("<h1 class='title'>π NLP Terminology</h1>", unsafe_allow_html=True) | |
| st.markdown("<p class='caption'>β¨ Explore essential terms in Natural Language Processing and their meanings!...</p>", unsafe_allow_html=True) | |
| st.header("π Corpus") | |
| st.markdown("- **A corpus** is a collection of documents.") | |
| st.header("π Document") | |
| st.markdown("- **A document** is a collection of sentences, paragraphs, single words, or even single characters.") | |
| st.header("π Paragraph") | |
| st.markdown("- **A paragraph** consists of multiple sentences.") | |
| st.header("π’ Sentence") | |
| st.markdown("- **A sentence** is a collection of words.") | |
| st.header("π€ Word") | |
| st.markdown("- **Words** are made up of characters.") | |
| st.header("π Character") | |
| st.markdown("- **A character** can be a number, alphabet, or special symbol.") | |
| st.header("βοΈ Tokenization") | |
| st.markdown("- **Tokenization** is a technique by using which we can convert a huge chunk into small entity where those small entities are known as tokens.") | |
| st.subheader("π οΈ Types of Tokenization") | |
| st.markdown(""" | |
| - πΉ **Sentence Tokenization** β Splits text into sentences. | |
| - πΉ **Word Tokenization** β Splits sentences into words. | |
| - πΉ **Character Tokenization** β Splits words into individual characters. | |
| """) | |
| st.subheader("π Sentence Tokenization") | |
| st.markdown("- **Breaks a large text into meaningful sentence units.**") | |
| st.subheader("π Word Tokenization") | |
| st.markdown("- **Splits a sentence into individual words.**") | |
| st.subheader("π‘ Character Tokenization") | |
| st.markdown("- **Breaks words into separate characters.**") | |
| st.header("π« Stop Words") | |
| st.markdown("- **Common words** (e.g., 'the', 'is', 'and') that do not add meaning to the text but maintain grammatical structure.") | |
| st.header("π Vectorization") | |
| st.markdown("- **Transforms text into numerical representation** for machine learning models.") | |
| st.subheader("π’ Different Types of Vectorization Techniques") | |
| st.markdown(""" | |
| - π― **One-Hot Encoding** | |
| - π·οΈ **Bag of Words (BoW)** | |
| - π **TF-IDF (Term Frequency-Inverse Document Frequency)** | |
| - π§ **Word2Vec** | |
| - π **GloVe** | |
| - β‘ **FastText** | |
| """) | |
| st.success("π Mastering these **NLP terminologies** will help you build powerful text-processing applications!") |