Natural_Language_Processing / pages /3. Terminology.py
satya11's picture
Update pages/3. Terminology.py
a6e7558 verified
import streamlit as st
# Custom CSS with modern design and animations
st.markdown("""
<style>
:root {
--primary: #2E86C1;
--secondary: #AED6F1;
--accent: #FF6B6B;
}
body {
background: linear-gradient(45deg, #f8f9fa, #e9ecef);
font-family: 'Segoe UI', system-ui;
}
.title-box {
background: linear-gradient(45deg, var(--primary), var(--secondary));
padding: 2rem;
border-radius: 15px;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
margin-bottom: 2rem;
}
h1 {
color: white !important;
font-family: 'Arial Rounded MT Bold';
text-align: center;
font-size: 2.5rem !important;
text-shadow: 2px 2px 4px rgba(0,0,0,0.2);
}
.term-card {
background: black;
border-radius: 10px;
padding: 1.5rem;
margin: 1rem 0;
box-shadow: 0 2px 4px rgba(0,0,0,0.05);
transition: transform 0.2s;
border-left: 4px solid var(--primary);
}
.term-card:hover {
transform: translateY(-3px);
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
}
.custom-icon {
font-size: 1.5rem;
margin-right: 0.5rem;
}
.sidebar .sidebar-content {
background: black !important;
border-right: 2px solid var(--secondary);
}
.stExpander {
border: none !important;
box-shadow: none !important;
}
mark {
background-color: var(--secondary);
padding: 0.2em 0.4em;
border-radius: 4px;
}
</style>
""", unsafe_allow_html=True)
# Sidebar with navigation
with st.sidebar:
st.header("πŸ” Navigation")
page_section = st.radio("Jump to:", [
"Basic Terms",
"Tokenization",
"Vectorization",
"Advanced Concepts"
])
# Main content
st.markdown("""
<div class='title-box'>
<h1>πŸ“š NLP Terminology Explorer</h1>
</div>
""", unsafe_allow_html=True)
# Basic Terms Section
if page_section == "Basic Terms":
st.markdown("### πŸ” Foundational Concepts")
terms = [
("πŸ“š Corpus", "A collection of documents"),
("πŸ“„ Document", "Collection of sentences, paragraphs, or text elements"),
("πŸ“ Paragraph", "Multiple sentences forming a coherent block"),
("πŸ’¬ Sentence", "Complete grammatical unit of words"),
("πŸ”€ Word", "Basic unit of language with meaning"),
("πŸ”  Character", "Individual letters, numbers, or symbols")
]
for term, definition in terms:
with st.expander(term):
st.markdown(f"""
<div class='term-card'>
<p style='font-size: 1.1rem; color: white;'>{definition}</p>
</div>
""", unsafe_allow_html=True)
# Tokenization Section
elif page_section == "Tokenization":
st.markdown("### βœ‚οΈ Text Segmentation Techniques")
col1, col2 = st.columns([2, 3])
with col1:
st.markdown("""
<div class='term-card'>
<h4>What is Tokenization?</h4>
<p>Process of breaking text into smaller meaningful units called tokens</p>
</div>
""", unsafe_allow_html=True)
with col2:
with st.expander("πŸ“ Types of Tokenization"):
st.markdown("""
- **Sentence Tokenization** `(NLTK, spaCy)`
- **Word Tokenization** `(Treebank, Regex)`
- **Subword Tokenization** `(BPE, WordPiece)`
- **Character-level Tokenization**
""")
st.markdown("#### πŸ› οΈ Tokenization Examples")
tab1, tab2, tab3 = st.tabs(["Sentence", "Word", "Character"])
with tab1:
st.code("Text: 'Hello world! NLP is awesome.'\nSentences: ['Hello world!', 'NLP is awesome.']")
with tab2:
st.code("Sentence: 'I love NLP!'\nWords: ['I', 'love', 'NLP', '!']")
with tab3:
st.code("Word: 'Hello'\nCharacters: ['H', 'e', 'l', 'l', 'o']")
# Vectorization Section
elif page_section == "Vectorization":
st.markdown("### πŸ”’ Text Representation Methods")
techniques = {
"Bag of Words": "Count-based representation ignoring word order",
"TF-IDF": "Statistical measure of word importance",
"Word2Vec": "Neural network-based word embeddings",
"BERT": "Contextual embeddings using transformers"
}
for tech, desc in techniques.items():
with st.expander(f"πŸ“Š {tech}"):
st.markdown(f"""
<div style='padding: 1rem; background: black; border-radius: 8px;'>
<p>{desc}</p>
<small>Example: {'...'}</small>
</div>
""", unsafe_allow_html=True)
# Advanced Concepts Section
elif page_section == "Advanced Concepts":
st.markdown("### 🧠 Advanced NLP Concepts")
concepts = [
("🚫 Stop Words", "Common words filtered during processing",
"the, is, at, which, on"),
("🏷️ POS Tagging", "Identifying grammatical components",
"Noun, Verb, Adjective"),
("πŸ“ Dependency Parsing", "Analyzing grammatical structure",
"Subject-verb relationships")
]
for title, desc, examples in concepts:
with st.expander(title):
st.markdown(f"""
<div class='term-card'>
<p><strong>{desc}</strong></p>
<div style='margin-top: 1rem; padding: 0.5rem; background: black; border-radius: 6px;'>
<small>Examples: {examples}</small>
</div>
</div>
""", unsafe_allow_html=True)
# Footer
st.markdown("---")
st.markdown("""
<div style='text-align: center; color: #666; margin-top: 3rem;'>
<p>πŸŽ“ Learn more about NLP with our interactive courses!</p>
<button style='background: var(--primary); color: white; border: none; padding: 0.5rem 2rem; border-radius: 25px;'>
Explore Courses
</button>
</div>
""", unsafe_allow_html=True)