Harika22 commited on
Commit
e92ee8b
·
verified ·
1 Parent(s): e0e4575

Update pages/6_Feature_Engineering.py

Browse files
Files changed (1) hide show
  1. pages/6_Feature_Engineering.py +15 -2
pages/6_Feature_Engineering.py CHANGED
@@ -137,8 +137,21 @@ st.markdown("""
137
  st.sidebar.title("Navigation 🧭")
138
  file_type = st.sidebar.radio(
139
  "Choose a Vectorization technique :",
140
- ("One-Hot Vectorization", "Bag of Words(BOW)", "Term Frequency - Inverse Document Frequency(TF-IDF)", "Word2Vec", "Fasttext"))
141
 
142
  if file_type == "One-Hot Vectorization":
143
- st.title("One-Hot Vectorization")
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
 
137
  st.sidebar.title("Navigation 🧭")
138
  file_type = st.sidebar.radio(
139
  "Choose a Vectorization technique :",
140
+ ("One-Hot Vectorization", "Bag of Words(BOW)", "Term Frequency - Inverse Document Frequency(TF-IDF)"))
141
 
142
  if file_type == "One-Hot Vectorization":
143
+ st.title(":red[One-Hot Vectorization]")
144
+ st.markdown('''
145
+ - It is type of vectorization technique where we can convert text into vector
146
+ - Steps in One-Hot vectorization
147
+ - 1. Create a vocabulary (set of all unique words in collected corpus)
148
+ - 2. Find the length of the vocabulary
149
+ - 3. Converting every document into vector form
150
+ - Every unique word into vector (where vector will have → d-dimension → len(vocabulary))
151
+ - Every dimension belongs to unique word
152
+ - Here we're not converting document into vector , we're converting each and every word to vector form and then combining it to form vector
153
+ - This technique is called One-Hot Vectorization
154
+ ''')
155
+
156
+
157