Spaces:

sree4411
/

NLP

Sleeping

App Files Files Community

sree4411 commited on Feb 5, 2025

Commit

105486b

verified ·

1 Parent(s): 1c1d531

Update app.py

Browse files

Files changed (1) hide show

app.py +1 -21

app.py CHANGED Viewed

@@ -53,7 +53,7 @@ if selected_method == "Bag of Words":
     st.subheader("Bag of Words (BoW)")
     st.write("""
     **Definition**: Represents text as a collection of word counts, ignoring grammar and word order.
     **Uses**:
     - Sentiment analysis
     - Document classification
@@ -67,10 +67,6 @@ if selected_method == "Bag of Words":
     ❌ Ignores word order and context
     ❌ High-dimensionality for large vocabularies
     """)
-    vectorizer = CountVectorizer()
-    X_bow = vectorizer.fit_transform(texts)
-    st.write("Feature Names:", vectorizer.get_feature_names_out())
-    st.write("Bag of Words Representation:", X_bow.toarray())
 elif selected_method == "TF-IDF":
     st.subheader("Term Frequency-Inverse Document Frequency (TF-IDF)")
@@ -90,10 +86,6 @@ elif selected_method == "TF-IDF":
     ❌ Still ignores word order
     ❌ Does not capture deep semantics
     """)
-    tfidf_vectorizer = TfidfVectorizer()
-    X_tfidf = tfidf_vectorizer.fit_transform(texts)
-    st.write("Feature Names:", tfidf_vectorizer.get_feature_names_out())
-    st.write("TF-IDF Representation:", X_tfidf.toarray())
 elif selected_method == "One-Hot Encoding":
     st.subheader("One-Hot Encoding")
@@ -112,10 +104,6 @@ elif selected_method == "One-Hot Encoding":
     ❌ Inefficient for large vocabularies
     ❌ No information on word meaning
     """)
-    one_hot_vectorizer = CountVectorizer(binary=True)
-    X_one_hot = one_hot_vectorizer.fit_transform(texts)
-    st.write("Feature Names:", one_hot_vectorizer.get_feature_names_out())
-    st.write("One-Hot Encoding Representation:", X_one_hot.toarray())
 elif selected_method == "Word Embeddings (Word2Vec)":
     st.subheader("Word Embeddings (Word2Vec)")
@@ -135,14 +123,6 @@ elif selected_method == "Word Embeddings (Word2Vec)":
     ❌ Requires large datasets to train
     ❌ Computationally expensive
     """)
-    model = Word2Vec(sentences=[text.split() for text in texts], vector_size=100, window=5, min_count=1, workers=4)
-    word_vectors = model.wv
-    word = 'natural'
-    if word in word_vectors:
-        st.write(f"Word2Vec Representation of '{word}':")
-        st.write(word_vectors[word])
-    else:
-        st.write(f"Word '{word}' not found in the vocabulary.")
 # Footer
 st.write("---")

     st.subheader("Bag of Words (BoW)")
     st.write("""
     **Definition**: Represents text as a collection of word counts, ignoring grammar and word order.
     **Uses**:
     - Sentiment analysis
     - Document classification
     ❌ Ignores word order and context
     ❌ High-dimensionality for large vocabularies
     """)
 elif selected_method == "TF-IDF":
     st.subheader("Term Frequency-Inverse Document Frequency (TF-IDF)")
     ❌ Still ignores word order
     ❌ Does not capture deep semantics
     """)
 elif selected_method == "One-Hot Encoding":
     st.subheader("One-Hot Encoding")
     ❌ Inefficient for large vocabularies
     ❌ No information on word meaning
     """)
 elif selected_method == "Word Embeddings (Word2Vec)":
     st.subheader("Word Embeddings (Word2Vec)")
     ❌ Requires large datasets to train
     ❌ Computationally expensive
     """)
 # Footer
 st.write("---")