Spaces:

satya11
/

Natural_Language_Processing

Sleeping

App Files Files Community

Natural_Language_Processing / pages /3. Terminology.py

satya11

Update pages/3. Terminology.py

a6e7558 verified 10 months ago

raw

history blame contribute delete

6.18 kB

	import streamlit as st

	# Custom CSS with modern design and animations
	st.markdown("""
	<style>
	:root {
	--primary: #2E86C1;
	--secondary: #AED6F1;
	--accent: #FF6B6B;
	}

	body {
	background: linear-gradient(45deg, #f8f9fa, #e9ecef);
	font-family: 'Segoe UI', system-ui;
	}

	.title-box {
	background: linear-gradient(45deg, var(--primary), var(--secondary));
	padding: 2rem;
	border-radius: 15px;
	box-shadow: 0 4px 6px rgba(0,0,0,0.1);
	margin-bottom: 2rem;
	}

	h1 {
	color: white !important;
	font-family: 'Arial Rounded MT Bold';
	text-align: center;
	font-size: 2.5rem !important;
	text-shadow: 2px 2px 4px rgba(0,0,0,0.2);
	}

	.term-card {
	background: black;
	border-radius: 10px;
	padding: 1.5rem;
	margin: 1rem 0;
	box-shadow: 0 2px 4px rgba(0,0,0,0.05);
	transition: transform 0.2s;
	border-left: 4px solid var(--primary);
	}

	.term-card:hover {
	transform: translateY(-3px);
	box-shadow: 0 4px 6px rgba(0,0,0,0.1);
	}

	.custom-icon {
	font-size: 1.5rem;
	margin-right: 0.5rem;
	}

	.sidebar .sidebar-content {
	background: black !important;
	border-right: 2px solid var(--secondary);
	}

	.stExpander {
	border: none !important;
	box-shadow: none !important;
	}

	mark {
	background-color: var(--secondary);
	padding: 0.2em 0.4em;
	border-radius: 4px;
	}
	</style>
	""", unsafe_allow_html=True)

	# Sidebar with navigation
	with st.sidebar:
	st.header("🔍 Navigation")
	page_section = st.radio("Jump to:", [
	"Basic Terms",
	"Tokenization",
	"Vectorization",
	"Advanced Concepts"
	])

	# Main content
	st.markdown("""
	<div class='title-box'>
	<h1>📚 NLP Terminology Explorer</h1>
	</div>
	""", unsafe_allow_html=True)

	# Basic Terms Section
	if page_section == "Basic Terms":
	st.markdown("### 🔍 Foundational Concepts")

	terms = [
	("📚 Corpus", "A collection of documents"),
	("📄 Document", "Collection of sentences, paragraphs, or text elements"),
	("📝 Paragraph", "Multiple sentences forming a coherent block"),
	("💬 Sentence", "Complete grammatical unit of words"),
	("🔤 Word", "Basic unit of language with meaning"),
	("🔠 Character", "Individual letters, numbers, or symbols")
	]

	for term, definition in terms:
	with st.expander(term):
	st.markdown(f"""
	<div class='term-card'>
	<p style='font-size: 1.1rem; color: white;'>{definition}</p>
	</div>
	""", unsafe_allow_html=True)

	# Tokenization Section
	elif page_section == "Tokenization":
	st.markdown("### ✂️ Text Segmentation Techniques")

	col1, col2 = st.columns([2, 3])

	with col1:
	st.markdown("""
	<div class='term-card'>
	<h4>What is Tokenization?</h4>
	<p>Process of breaking text into smaller meaningful units called tokens</p>
	</div>
	""", unsafe_allow_html=True)

	with col2:
	with st.expander("📐 Types of Tokenization"):
	st.markdown("""
	- Sentence Tokenization `(NLTK, spaCy)`
	- Word Tokenization `(Treebank, Regex)`
	- Subword Tokenization `(BPE, WordPiece)`
	- Character-level Tokenization
	""")

	st.markdown("#### 🛠️ Tokenization Examples")
	tab1, tab2, tab3 = st.tabs(["Sentence", "Word", "Character"])

	with tab1:
	st.code("Text: 'Hello world! NLP is awesome.'\nSentences: ['Hello world!', 'NLP is awesome.']")

	with tab2:
	st.code("Sentence: 'I love NLP!'\nWords: ['I', 'love', 'NLP', '!']")

	with tab3:
	st.code("Word: 'Hello'\nCharacters: ['H', 'e', 'l', 'l', 'o']")

	# Vectorization Section
	elif page_section == "Vectorization":
	st.markdown("### 🔢 Text Representation Methods")

	techniques = {
	"Bag of Words": "Count-based representation ignoring word order",
	"TF-IDF": "Statistical measure of word importance",
	"Word2Vec": "Neural network-based word embeddings",
	"BERT": "Contextual embeddings using transformers"
	}

	for tech, desc in techniques.items():
	with st.expander(f"📊 {tech}"):
	st.markdown(f"""
	<div style='padding: 1rem; background: black; border-radius: 8px;'>
	<p>{desc}</p>
	<small>Example: {'...'}</small>
	</div>
	""", unsafe_allow_html=True)

	# Advanced Concepts Section
	elif page_section == "Advanced Concepts":
	st.markdown("### 🧠 Advanced NLP Concepts")

	concepts = [
	("🚫 Stop Words", "Common words filtered during processing",
	"the, is, at, which, on"),
	("🏷️ POS Tagging", "Identifying grammatical components",
	"Noun, Verb, Adjective"),
	("📐 Dependency Parsing", "Analyzing grammatical structure",
	"Subject-verb relationships")
	]

	for title, desc, examples in concepts:
	with st.expander(title):
	st.markdown(f"""
	<div class='term-card'>
	<p><strong>{desc}</strong></p>
	<div style='margin-top: 1rem; padding: 0.5rem; background: black; border-radius: 6px;'>
	<small>Examples: {examples}</small>
	</div>
	</div>
	""", unsafe_allow_html=True)

	# Footer
	st.markdown("---")
	st.markdown("""
	<div style='text-align: center; color: #666; margin-top: 3rem;'>
	<p>🎓 Learn more about NLP with our interactive courses!</p>
	<button style='background: var(--primary); color: white; border: none; padding: 0.5rem 2rem; border-radius: 25px;'>
	Explore Courses
	</button>
	</div>
	""", unsafe_allow_html=True)