Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| # Custom CSS with modern design and animations | |
| st.markdown(""" | |
| <style> | |
| :root { | |
| --primary: #2E86C1; | |
| --secondary: #AED6F1; | |
| --accent: #FF6B6B; | |
| } | |
| body { | |
| background: linear-gradient(45deg, #f8f9fa, #e9ecef); | |
| font-family: 'Segoe UI', system-ui; | |
| } | |
| .title-box { | |
| background: linear-gradient(45deg, var(--primary), var(--secondary)); | |
| padding: 2rem; | |
| border-radius: 15px; | |
| box-shadow: 0 4px 6px rgba(0,0,0,0.1); | |
| margin-bottom: 2rem; | |
| } | |
| h1 { | |
| color: white !important; | |
| font-family: 'Arial Rounded MT Bold'; | |
| text-align: center; | |
| font-size: 2.5rem !important; | |
| text-shadow: 2px 2px 4px rgba(0,0,0,0.2); | |
| } | |
| .term-card { | |
| background: black; | |
| border-radius: 10px; | |
| padding: 1.5rem; | |
| margin: 1rem 0; | |
| box-shadow: 0 2px 4px rgba(0,0,0,0.05); | |
| transition: transform 0.2s; | |
| border-left: 4px solid var(--primary); | |
| } | |
| .term-card:hover { | |
| transform: translateY(-3px); | |
| box-shadow: 0 4px 6px rgba(0,0,0,0.1); | |
| } | |
| .custom-icon { | |
| font-size: 1.5rem; | |
| margin-right: 0.5rem; | |
| } | |
| .sidebar .sidebar-content { | |
| background: black !important; | |
| border-right: 2px solid var(--secondary); | |
| } | |
| .stExpander { | |
| border: none !important; | |
| box-shadow: none !important; | |
| } | |
| mark { | |
| background-color: var(--secondary); | |
| padding: 0.2em 0.4em; | |
| border-radius: 4px; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # Sidebar with navigation | |
| with st.sidebar: | |
| st.header("π Navigation") | |
| page_section = st.radio("Jump to:", [ | |
| "Basic Terms", | |
| "Tokenization", | |
| "Vectorization", | |
| "Advanced Concepts" | |
| ]) | |
| # Main content | |
| st.markdown(""" | |
| <div class='title-box'> | |
| <h1>π NLP Terminology Explorer</h1> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Basic Terms Section | |
| if page_section == "Basic Terms": | |
| st.markdown("### π Foundational Concepts") | |
| terms = [ | |
| ("π Corpus", "A collection of documents"), | |
| ("π Document", "Collection of sentences, paragraphs, or text elements"), | |
| ("π Paragraph", "Multiple sentences forming a coherent block"), | |
| ("π¬ Sentence", "Complete grammatical unit of words"), | |
| ("π€ Word", "Basic unit of language with meaning"), | |
| ("π Character", "Individual letters, numbers, or symbols") | |
| ] | |
| for term, definition in terms: | |
| with st.expander(term): | |
| st.markdown(f""" | |
| <div class='term-card'> | |
| <p style='font-size: 1.1rem; color: white;'>{definition}</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Tokenization Section | |
| elif page_section == "Tokenization": | |
| st.markdown("### βοΈ Text Segmentation Techniques") | |
| col1, col2 = st.columns([2, 3]) | |
| with col1: | |
| st.markdown(""" | |
| <div class='term-card'> | |
| <h4>What is Tokenization?</h4> | |
| <p>Process of breaking text into smaller meaningful units called tokens</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| with col2: | |
| with st.expander("π Types of Tokenization"): | |
| st.markdown(""" | |
| - **Sentence Tokenization** `(NLTK, spaCy)` | |
| - **Word Tokenization** `(Treebank, Regex)` | |
| - **Subword Tokenization** `(BPE, WordPiece)` | |
| - **Character-level Tokenization** | |
| """) | |
| st.markdown("#### π οΈ Tokenization Examples") | |
| tab1, tab2, tab3 = st.tabs(["Sentence", "Word", "Character"]) | |
| with tab1: | |
| st.code("Text: 'Hello world! NLP is awesome.'\nSentences: ['Hello world!', 'NLP is awesome.']") | |
| with tab2: | |
| st.code("Sentence: 'I love NLP!'\nWords: ['I', 'love', 'NLP', '!']") | |
| with tab3: | |
| st.code("Word: 'Hello'\nCharacters: ['H', 'e', 'l', 'l', 'o']") | |
| # Vectorization Section | |
| elif page_section == "Vectorization": | |
| st.markdown("### π’ Text Representation Methods") | |
| techniques = { | |
| "Bag of Words": "Count-based representation ignoring word order", | |
| "TF-IDF": "Statistical measure of word importance", | |
| "Word2Vec": "Neural network-based word embeddings", | |
| "BERT": "Contextual embeddings using transformers" | |
| } | |
| for tech, desc in techniques.items(): | |
| with st.expander(f"π {tech}"): | |
| st.markdown(f""" | |
| <div style='padding: 1rem; background: black; border-radius: 8px;'> | |
| <p>{desc}</p> | |
| <small>Example: {'...'}</small> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Advanced Concepts Section | |
| elif page_section == "Advanced Concepts": | |
| st.markdown("### π§ Advanced NLP Concepts") | |
| concepts = [ | |
| ("π« Stop Words", "Common words filtered during processing", | |
| "the, is, at, which, on"), | |
| ("π·οΈ POS Tagging", "Identifying grammatical components", | |
| "Noun, Verb, Adjective"), | |
| ("π Dependency Parsing", "Analyzing grammatical structure", | |
| "Subject-verb relationships") | |
| ] | |
| for title, desc, examples in concepts: | |
| with st.expander(title): | |
| st.markdown(f""" | |
| <div class='term-card'> | |
| <p><strong>{desc}</strong></p> | |
| <div style='margin-top: 1rem; padding: 0.5rem; background: black; border-radius: 6px;'> | |
| <small>Examples: {examples}</small> | |
| </div> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Footer | |
| st.markdown("---") | |
| st.markdown(""" | |
| <div style='text-align: center; color: #666; margin-top: 3rem;'> | |
| <p>π Learn more about NLP with our interactive courses!</p> | |
| <button style='background: var(--primary); color: white; border: none; padding: 0.5rem 2rem; border-radius: 25px;'> | |
| Explore Courses | |
| </button> | |
| </div> | |
| """, unsafe_allow_html=True) |