| import streamlit as st |
|
|
| st.markdown(""" |
| <style> |
| /* Set a soft background color */ |
| body { |
| background-color: #eef2f7; |
| } |
| /* Style for main title */ |
| h1 { |
| color: black; |
| font-family: 'Roboto', sans-serif; |
| font-weight: 700; |
| text-align: center; |
| margin-bottom: 25px; |
| } |
| /* Style for headers */ |
| h2 { |
| color: red; |
| font-family: 'Roboto', sans-serif; |
| font-weight: 600; |
| margin-top: 30px; |
| } |
| |
| /* Style for subheaders */ |
| h3 { |
| color: violet; |
| font-family: 'Roboto', sans-serif; |
| font-weight: 500; |
| margin-top: 20px; |
| } |
| .custom-subheader { |
| color: violet; |
| font-family: 'Roboto', sans-serif; |
| font-weight: 600; |
| margin-bottom: 15px; |
| } |
| /* Paragraph styling */ |
| p { |
| font-family: 'Georgia', serif; |
| line-height: 1.8; |
| color: black; |
| margin-bottom: 20px; |
| } |
| /* List styling with checkmark bullets */ |
| .icon-bullet { |
| list-style-type: none; |
| padding-left: 20px; |
| } |
| .icon-bullet li { |
| font-family: 'Georgia', serif; |
| font-size: 1.1em; |
| margin-bottom: 10px; |
| color: black; |
| } |
| .icon-bullet li::before { |
| content: "◆"; |
| padding-right: 10px; |
| color: black; |
| } |
| /* Sidebar styling */ |
| .sidebar .sidebar-content { |
| background-color: #ffffff; |
| border-radius: 10px; |
| padding: 15px; |
| } |
| .sidebar h2 { |
| color: #495057; |
| } |
| /* Custom button style */ |
| .streamlit-button { |
| background-color: #00FFFF; |
| color: #000000; |
| font-weight: bold; |
| } |
| </style> |
| """, unsafe_allow_html=True) |
|
|
| st.markdown("<h1 class='title'>NLP Terminology</h1>", unsafe_allow_html=True) |
|
|
| st.markdown( |
| "<p class='caption'>Explore essential terms in Natural Language Processing and their meanings!...</p>", |
| unsafe_allow_html=True, |
| ) |
| st.header("Document") |
| st.markdown(''' |
| - Document is defined as collection of sentence / paragraph / single word / single character |
| ''') |
|
|
| st.header("Paragraph") |
| st.markdown(''' |
| - Paragraph is defined as collection of sentence. |
| ''') |
|
|
| st.header("Sentence") |
| st.markdown(''' |
| - Sentence is defined as collection of words. |
| ''') |
|
|
| st.header("Word") |
| st.markdown(''' |
| - Words are defined as collection of characters |
| ''') |
|
|
| st.header("Character") |
| st.markdown(''' |
| - Character can either be in number , alphabets or special symbol. |
| ''') |
|
|
| st.header("Tokenization") |
| st.markdown(''' |
| - It is a technique by using which we can convert a huge chunk into small entity where those small entities are known as tokens. |
| ''') |
|
|
| st.subheader("Types of Tokenization") |
| st.markdown(""" |
| <ul class="icon-bullet"> |
| <li>Sentence tokenization</li> |
| <li>Word tokennization</li> |
| <li>Character tokenization </li> |
| </ul> |
| """, unsafe_allow_html=True) |
|
|
| st.subheader("Sentence tokenization") |
| st.markdown(''' |
| - It is a technique by using which we can convert a huge chunk into small entity where those small entities are known as tokens which are in sentence. |
| ''') |
|
|
| st.subheader("Word tokenization") |
| st.markdown(''' |
| - It is a technique by using which we can convert a huge chunk into small entity where those small entities are known as tokens which are words. |
| ''') |
|
|
| st.subheader("Character tokenization") |
| st.markdown(''' |
| - It is a technique by using which we can convert a huge chunk into small entity where those small entities are known as tokens which are in characters. |
| ''') |
|
|
| st.header("Stop Words") |
| st.markdown(''' |
| - They are set of words which didn't have impact on the meaning of sentence / paragraph |
| - Stop words are used to make the grammar very clear |
| ''') |
|
|
| st.header("Vectorization") |
| st.markdown(''' |
| - It is a technique which helps us to convert a text into vector format |
| ''') |
|
|
| st.subheader("Different types of techniques") |
| st.markdown(""" |
| <ul class="icon-bullet"> |
| <li>One-Hot Vectorization </li> |
| <li>Bag of Words</li> |
| <li>TF-IDF (Term Frequency and Inverse Document Frequency)</li> |
| <li>Word2Vector</li> |
| <li>Glove</li> |
| <li>Fast text</li> |
| </ul> |
| """, unsafe_allow_html=True) |