sree4411 commited on
Commit
105486b
Β·
verified Β·
1 Parent(s): 1c1d531

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -21
app.py CHANGED
@@ -53,7 +53,7 @@ if selected_method == "Bag of Words":
53
  st.subheader("Bag of Words (BoW)")
54
  st.write("""
55
  **Definition**: Represents text as a collection of word counts, ignoring grammar and word order.
56
-
57
  **Uses**:
58
  - Sentiment analysis
59
  - Document classification
@@ -67,10 +67,6 @@ if selected_method == "Bag of Words":
67
  ❌ Ignores word order and context
68
  ❌ High-dimensionality for large vocabularies
69
  """)
70
- vectorizer = CountVectorizer()
71
- X_bow = vectorizer.fit_transform(texts)
72
- st.write("Feature Names:", vectorizer.get_feature_names_out())
73
- st.write("Bag of Words Representation:", X_bow.toarray())
74
 
75
  elif selected_method == "TF-IDF":
76
  st.subheader("Term Frequency-Inverse Document Frequency (TF-IDF)")
@@ -90,10 +86,6 @@ elif selected_method == "TF-IDF":
90
  ❌ Still ignores word order
91
  ❌ Does not capture deep semantics
92
  """)
93
- tfidf_vectorizer = TfidfVectorizer()
94
- X_tfidf = tfidf_vectorizer.fit_transform(texts)
95
- st.write("Feature Names:", tfidf_vectorizer.get_feature_names_out())
96
- st.write("TF-IDF Representation:", X_tfidf.toarray())
97
 
98
  elif selected_method == "One-Hot Encoding":
99
  st.subheader("One-Hot Encoding")
@@ -112,10 +104,6 @@ elif selected_method == "One-Hot Encoding":
112
  ❌ Inefficient for large vocabularies
113
  ❌ No information on word meaning
114
  """)
115
- one_hot_vectorizer = CountVectorizer(binary=True)
116
- X_one_hot = one_hot_vectorizer.fit_transform(texts)
117
- st.write("Feature Names:", one_hot_vectorizer.get_feature_names_out())
118
- st.write("One-Hot Encoding Representation:", X_one_hot.toarray())
119
 
120
  elif selected_method == "Word Embeddings (Word2Vec)":
121
  st.subheader("Word Embeddings (Word2Vec)")
@@ -135,14 +123,6 @@ elif selected_method == "Word Embeddings (Word2Vec)":
135
  ❌ Requires large datasets to train
136
  ❌ Computationally expensive
137
  """)
138
- model = Word2Vec(sentences=[text.split() for text in texts], vector_size=100, window=5, min_count=1, workers=4)
139
- word_vectors = model.wv
140
- word = 'natural'
141
- if word in word_vectors:
142
- st.write(f"Word2Vec Representation of '{word}':")
143
- st.write(word_vectors[word])
144
- else:
145
- st.write(f"Word '{word}' not found in the vocabulary.")
146
 
147
  # Footer
148
  st.write("---")
 
53
  st.subheader("Bag of Words (BoW)")
54
  st.write("""
55
  **Definition**: Represents text as a collection of word counts, ignoring grammar and word order.
56
+
57
  **Uses**:
58
  - Sentiment analysis
59
  - Document classification
 
67
  ❌ Ignores word order and context
68
  ❌ High-dimensionality for large vocabularies
69
  """)
 
 
 
 
70
 
71
  elif selected_method == "TF-IDF":
72
  st.subheader("Term Frequency-Inverse Document Frequency (TF-IDF)")
 
86
  ❌ Still ignores word order
87
  ❌ Does not capture deep semantics
88
  """)
 
 
 
 
89
 
90
  elif selected_method == "One-Hot Encoding":
91
  st.subheader("One-Hot Encoding")
 
104
  ❌ Inefficient for large vocabularies
105
  ❌ No information on word meaning
106
  """)
 
 
 
 
107
 
108
  elif selected_method == "Word Embeddings (Word2Vec)":
109
  st.subheader("Word Embeddings (Word2Vec)")
 
123
  ❌ Requires large datasets to train
124
  ❌ Computationally expensive
125
  """)
 
 
 
 
 
 
 
 
126
 
127
  # Footer
128
  st.write("---")