sree4411 commited on
Commit
0b2e3e9
Β·
verified Β·
1 Parent(s): 80fae49

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -79
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import streamlit as st
2
  from gensim.models import Word2Vec
3
 
4
- # Apply custom styles
5
  st.markdown("""
6
  <style>
7
  .main-title { color: #FF5733; font-size: 40px; font-weight: bold; text-align: center; }
@@ -17,32 +17,29 @@ st.markdown('<p class="main-title">Introduction to NLP</p>', unsafe_allow_html=T
17
  # Section: What is NLP?
18
  st.markdown('<p class="section-title">What is NLP?</p>', unsafe_allow_html=True)
19
  st.markdown("""
20
- <p class="text">
21
  Natural Language Processing (NLP) is a subfield of artificial intelligence that enables computers to process, understand, and generate human language.
22
- </p>
23
-
24
- <p class="sub-title">Applications of NLP:</p>
25
- βœ… Chatbots & Virtual Assistants (e.g., Siri, Alexa)
26
- βœ… Sentiment Analysis (e.g., Product reviews, Social Media monitoring)
27
- βœ… Machine Translation (e.g., Google Translate)
28
- βœ… Text Summarization (e.g., News article summaries)
29
- βœ… Speech Recognition (e.g., Voice commands)
30
- """, unsafe_allow_html=True)
31
 
32
  # Section: NLP Terminologies
33
  st.markdown('<p class="section-title">NLP Terminologies</p>', unsafe_allow_html=True)
34
  st.markdown("""
35
- <p class="text">
36
- πŸ”Ή **Corpus**: A collection of text documents used for NLP tasks.
37
- πŸ”Ή **Tokenization**: Splitting text into individual words or phrases.
38
- πŸ”Ή **Stop Words**: Common words (e.g., "the", "is") that are often removed.
39
- πŸ”Ή **Stemming**: Reducing words to their base form (e.g., "running" β†’ "run").
40
- πŸ”Ή **Lemmatization**: More advanced than stemming; converts words to their dictionary form.
41
- πŸ”Ή **NER (Named Entity Recognition)**: Identifies entities like names, dates, and locations.
42
- πŸ”Ή **Sentiment Analysis**: Determines the sentiment (positive, negative, neutral) of a text.
43
- πŸ”Ή **n-grams**: Sequences of 'n' consecutive words (e.g., "New York" is a bi-gram).
44
- </p>
45
- """, unsafe_allow_html=True)
46
 
47
  # Section: Text Representation Methods
48
  st.markdown('<p class="section-title">Text Representation Methods</p>', unsafe_allow_html=True)
@@ -57,85 +54,81 @@ selected_method = st.radio("Select a text representation method:", methods)
57
  if selected_method == "Bag of Words":
58
  st.markdown('<p class="sub-title">Bag of Words (BoW)</p>', unsafe_allow_html=True)
59
  st.markdown("""
60
- <p class="text">
61
  **Definition**: Represents text as a collection of word counts, ignoring grammar and word order.
62
- </p>
63
-
64
- <p class="sub-title">Uses:</p>
65
- βœ… Sentiment analysis
66
- βœ… Document classification
67
- βœ… Information retrieval
68
 
69
- <p class="sub-title">Advantages:</p>
70
- βœ… Simple and easy to implement
71
- βœ… Works well with traditional ML models
72
 
73
- <p class="sub-title">Disadvantages:</p>
74
- ❌ Ignores word order and context
75
- ❌ High-dimensionality for large vocabularies
76
- """, unsafe_allow_html=True)
77
 
78
  elif selected_method == "TF-IDF":
79
  st.markdown('<p class="sub-title">Term Frequency-Inverse Document Frequency (TF-IDF)</p>', unsafe_allow_html=True)
80
  st.markdown("""
81
- <p class="text">
82
  **Definition**: Weighs words based on their frequency in a document and across all documents.
83
- </p>
84
-
85
- <p class="sub-title">Uses:</p>
86
- βœ… Information retrieval (e.g., search engines)
87
- βœ… Text classification
88
- βœ… Keyword extraction
89
 
90
- <p class="sub-title">Advantages:</p>
91
- βœ… Reduces the impact of common words
92
- βœ… Highlights important words
93
 
94
- <p class="sub-title">Disadvantages:</p>
95
- ❌ Still ignores word order
96
- ❌ Does not capture deep semantics
97
- """, unsafe_allow_html=True)
98
 
99
  elif selected_method == "One-Hot Encoding":
100
  st.markdown('<p class="sub-title">One-Hot Encoding</p>', unsafe_allow_html=True)
101
  st.markdown("""
102
- <p class="text">
103
  **Definition**: Represents words as binary vectors where each word has a unique position in a vocabulary.
104
- </p>
105
-
106
- <p class="sub-title">Uses:</p>
107
- βœ… Simple NLP tasks
108
- βœ… Word-level feature engineering
109
 
110
- <p class="sub-title">Advantages:</p>
111
- βœ… Simple to understand
112
- βœ… Works well with small vocabulary sizes
113
 
114
- <p class="sub-title">Disadvantages:</p>
115
- ❌ Inefficient for large vocabularies
116
- ❌ No information on word meaning
117
- """, unsafe_allow_html=True)
118
 
119
  elif selected_method == "Word Embeddings (Word2Vec)":
120
  st.markdown('<p class="sub-title">Word Embeddings (Word2Vec)</p>', unsafe_allow_html=True)
121
  st.markdown("""
122
- <p class="text">
123
  **Definition**: Converts words into dense numerical vectors capturing semantic relationships.
124
- </p>
125
-
126
- <p class="sub-title">Uses:</p>
127
- βœ… Machine translation
128
- βœ… Speech recognition
129
- βœ… Sentiment analysis
130
 
131
- <p class="sub-title">Advantages:</p>
132
- βœ… Captures semantic relationships
133
- βœ… Works well for deep learning models
134
 
135
- <p class="sub-title">Disadvantages:</p>
136
- ❌ Requires large datasets to train
137
- ❌ Computationally expensive
138
- """, unsafe_allow_html=True)
139
 
140
  # Sample texts
141
  texts = [
@@ -147,10 +140,10 @@ elif selected_method == "Word Embeddings (Word2Vec)":
147
  word_vectors = model.wv
148
  word = 'natural'
149
  if word in word_vectors:
150
- st.markdown(f'<p class="text">Word2Vec Representation of "{word}":</p>', unsafe_allow_html=True)
151
  st.write(word_vectors[word])
152
  else:
153
- st.markdown(f'<p class="text">Word "{word}" not found in the vocabulary.</p>', unsafe_allow_html=True)
154
 
155
  # Footer
156
  st.markdown('<hr>', unsafe_allow_html=True)
 
1
  import streamlit as st
2
  from gensim.models import Word2Vec
3
 
4
+ # Apply custom styles using Streamlit's markdown
5
  st.markdown("""
6
  <style>
7
  .main-title { color: #FF5733; font-size: 40px; font-weight: bold; text-align: center; }
 
17
  # Section: What is NLP?
18
  st.markdown('<p class="section-title">What is NLP?</p>', unsafe_allow_html=True)
19
  st.markdown("""
 
20
  Natural Language Processing (NLP) is a subfield of artificial intelligence that enables computers to process, understand, and generate human language.
21
+ """)
22
+ st.markdown("""
23
+ **Applications of NLP:**
24
+ - βœ… Chatbots & Virtual Assistants (e.g., Siri, Alexa)
25
+ - βœ… Sentiment Analysis (e.g., Product reviews, Social Media monitoring)
26
+ - βœ… Machine Translation (e.g., Google Translate)
27
+ - βœ… Text Summarization (e.g., News article summaries)
28
+ - βœ… Speech Recognition (e.g., Voice commands)
29
+ """)
30
 
31
  # Section: NLP Terminologies
32
  st.markdown('<p class="section-title">NLP Terminologies</p>', unsafe_allow_html=True)
33
  st.markdown("""
34
+ **Corpus**: A collection of text documents used for NLP tasks.
35
+ **Tokenization**: Splitting text into individual words or phrases.
36
+ **Stop Words**: Common words (e.g., "the", "is") that are often removed.
37
+ **Stemming**: Reducing words to their base form (e.g., "running" β†’ "run").
38
+ **Lemmatization**: More advanced than stemming; converts words to their dictionary form.
39
+ **NER (Named Entity Recognition)**: Identifies entities like names, dates, and locations.
40
+ **Sentiment Analysis**: Determines the sentiment (positive, negative, neutral) of a text.
41
+ **n-grams**: Sequences of 'n' consecutive words (e.g., "New York" is a bi-gram).
42
+ """)
 
 
43
 
44
  # Section: Text Representation Methods
45
  st.markdown('<p class="section-title">Text Representation Methods</p>', unsafe_allow_html=True)
 
54
  if selected_method == "Bag of Words":
55
  st.markdown('<p class="sub-title">Bag of Words (BoW)</p>', unsafe_allow_html=True)
56
  st.markdown("""
 
57
  **Definition**: Represents text as a collection of word counts, ignoring grammar and word order.
58
+ """)
59
+ st.markdown("""
60
+ **Uses:**
61
+ - βœ… Sentiment analysis
62
+ - βœ… Document classification
63
+ - βœ… Information retrieval
64
 
65
+ **Advantages:**
66
+ - βœ… Simple and easy to implement
67
+ - βœ… Works well with traditional ML models
68
 
69
+ **Disadvantages:**
70
+ - ❌ Ignores word order and context
71
+ - ❌ High-dimensionality for large vocabularies
72
+ """)
73
 
74
  elif selected_method == "TF-IDF":
75
  st.markdown('<p class="sub-title">Term Frequency-Inverse Document Frequency (TF-IDF)</p>', unsafe_allow_html=True)
76
  st.markdown("""
 
77
  **Definition**: Weighs words based on their frequency in a document and across all documents.
78
+ """)
79
+ st.markdown("""
80
+ **Uses:**
81
+ - βœ… Information retrieval (e.g., search engines)
82
+ - βœ… Text classification
83
+ - βœ… Keyword extraction
84
 
85
+ **Advantages:**
86
+ - βœ… Reduces the impact of common words
87
+ - βœ… Highlights important words
88
 
89
+ **Disadvantages:**
90
+ - ❌ Still ignores word order
91
+ - ❌ Does not capture deep semantics
92
+ """)
93
 
94
  elif selected_method == "One-Hot Encoding":
95
  st.markdown('<p class="sub-title">One-Hot Encoding</p>', unsafe_allow_html=True)
96
  st.markdown("""
 
97
  **Definition**: Represents words as binary vectors where each word has a unique position in a vocabulary.
98
+ """)
99
+ st.markdown("""
100
+ **Uses:**
101
+ - βœ… Simple NLP tasks
102
+ - βœ… Word-level feature engineering
103
 
104
+ **Advantages:**
105
+ - βœ… Simple to understand
106
+ - βœ… Works well with small vocabulary sizes
107
 
108
+ **Disadvantages:**
109
+ - ❌ Inefficient for large vocabularies
110
+ - ❌ No information on word meaning
111
+ """)
112
 
113
  elif selected_method == "Word Embeddings (Word2Vec)":
114
  st.markdown('<p class="sub-title">Word Embeddings (Word2Vec)</p>', unsafe_allow_html=True)
115
  st.markdown("""
 
116
  **Definition**: Converts words into dense numerical vectors capturing semantic relationships.
117
+ """)
118
+ st.markdown("""
119
+ **Uses:**
120
+ - βœ… Machine translation
121
+ - βœ… Speech recognition
122
+ - βœ… Sentiment analysis
123
 
124
+ **Advantages:**
125
+ - βœ… Captures semantic relationships
126
+ - βœ… Works well for deep learning models
127
 
128
+ **Disadvantages:**
129
+ - ❌ Requires large datasets to train
130
+ - ❌ Computationally expensive
131
+ """)
132
 
133
  # Sample texts
134
  texts = [
 
140
  word_vectors = model.wv
141
  word = 'natural'
142
  if word in word_vectors:
143
+ st.markdown(f'Word2Vec Representation of "{word}":')
144
  st.write(word_vectors[word])
145
  else:
146
+ st.markdown(f'Word "{word}" not found in the vocabulary.')
147
 
148
  # Footer
149
  st.markdown('<hr>', unsafe_allow_html=True)