import streamlit as st # Custom CSS with modern design and animations st.markdown(""" """, unsafe_allow_html=True) # Sidebar with navigation with st.sidebar: st.header("🔍 Navigation") page_section = st.radio("Jump to:", [ "Basic Terms", "Tokenization", "Vectorization", "Advanced Concepts" ]) # Main content st.markdown("""

📚 NLP Terminology Explorer

""", unsafe_allow_html=True) # Basic Terms Section if page_section == "Basic Terms": st.markdown("### 🔍 Foundational Concepts") terms = [ ("📚 Corpus", "A collection of documents"), ("📄 Document", "Collection of sentences, paragraphs, or text elements"), ("📝 Paragraph", "Multiple sentences forming a coherent block"), ("💬 Sentence", "Complete grammatical unit of words"), ("🔤 Word", "Basic unit of language with meaning"), ("🔠 Character", "Individual letters, numbers, or symbols") ] for term, definition in terms: with st.expander(term): st.markdown(f"""

{definition}

""", unsafe_allow_html=True) # Tokenization Section elif page_section == "Tokenization": st.markdown("### ✂️ Text Segmentation Techniques") col1, col2 = st.columns([2, 3]) with col1: st.markdown("""

What is Tokenization?

Process of breaking text into smaller meaningful units called tokens

""", unsafe_allow_html=True) with col2: with st.expander("📐 Types of Tokenization"): st.markdown(""" - **Sentence Tokenization** `(NLTK, spaCy)` - **Word Tokenization** `(Treebank, Regex)` - **Subword Tokenization** `(BPE, WordPiece)` - **Character-level Tokenization** """) st.markdown("#### 🛠️ Tokenization Examples") tab1, tab2, tab3 = st.tabs(["Sentence", "Word", "Character"]) with tab1: st.code("Text: 'Hello world! NLP is awesome.'\nSentences: ['Hello world!', 'NLP is awesome.']") with tab2: st.code("Sentence: 'I love NLP!'\nWords: ['I', 'love', 'NLP', '!']") with tab3: st.code("Word: 'Hello'\nCharacters: ['H', 'e', 'l', 'l', 'o']") # Vectorization Section elif page_section == "Vectorization": st.markdown("### 🔢 Text Representation Methods") techniques = { "Bag of Words": "Count-based representation ignoring word order", "TF-IDF": "Statistical measure of word importance", "Word2Vec": "Neural network-based word embeddings", "BERT": "Contextual embeddings using transformers" } for tech, desc in techniques.items(): with st.expander(f"📊 {tech}"): st.markdown(f"""

{desc}

Example: {'...'}

""", unsafe_allow_html=True) # Advanced Concepts Section elif page_section == "Advanced Concepts": st.markdown("### 🧠 Advanced NLP Concepts") concepts = [ ("🚫 Stop Words", "Common words filtered during processing", "the, is, at, which, on"), ("🏷️ POS Tagging", "Identifying grammatical components", "Noun, Verb, Adjective"), ("📐 Dependency Parsing", "Analyzing grammatical structure", "Subject-verb relationships") ] for title, desc, examples in concepts: with st.expander(title): st.markdown(f"""

{desc}

Examples: {examples}

""", unsafe_allow_html=True) # Footer st.markdown("---") st.markdown("""

🎓 Learn more about NLP with our interactive courses!

""", unsafe_allow_html=True)