Rajesh6 commited on
Commit
3ce499e
·
verified ·
1 Parent(s): 33f119e

Update pages/Introduction.py

Browse files
Files changed (1) hide show
  1. pages/Introduction.py +24 -2
pages/Introduction.py CHANGED
@@ -67,8 +67,6 @@ st.write('**Term Frequency (TF)** \n - Measures how often a word appears in a si
67
  st.write('**Inverse Document Frequency (IDF)** \n Measures how unique or rare a word is across all documents in the corpus. \n - Formula: \n _IDF_ = log(Total no.of documents / No of Documnets containing the word) \n Words that appear in many documents (like "the" or "and") will have a low IDF value, while unique words (like "NLP") will have a higher IDF.')
68
  st.write('**TF - IDF Score:** \n - Combines TF and IDF to calculate the importance of a word in a document. \n - Formula: \n _TF - IDF = TF x IDF_ \n Words that are frequent in a document but rare in the overall corpus get a higher score.')
69
 
70
- st.write("Examples:")
71
-
72
  st.write("""
73
  **Example**
74
  **Consider these two documents:**
@@ -88,3 +86,27 @@ st.write("""
88
  - "NLP" gets a TF-IDF score of **1/3 × 0 = 0** (not unique).
89
  - "love" and "amazing" get scores of **1/3 × 0.69 = 0.23** (more unique).
90
  """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  st.write('**Inverse Document Frequency (IDF)** \n Measures how unique or rare a word is across all documents in the corpus. \n - Formula: \n _IDF_ = log(Total no.of documents / No of Documnets containing the word) \n Words that appear in many documents (like "the" or "and") will have a low IDF value, while unique words (like "NLP") will have a higher IDF.')
68
  st.write('**TF - IDF Score:** \n - Combines TF and IDF to calculate the importance of a word in a document. \n - Formula: \n _TF - IDF = TF x IDF_ \n Words that are frequent in a document but rare in the overall corpus get a higher score.')
69
 
 
 
70
  st.write("""
71
  **Example**
72
  **Consider these two documents:**
 
86
  - "NLP" gets a TF-IDF score of **1/3 × 0 = 0** (not unique).
87
  - "love" and "amazing" get scores of **1/3 × 0.69 = 0.23** (more unique).
88
  """)
89
+
90
+
91
+ st.markdown('<p style="color:lightblue;"><b>c. Word Embeddings</b></p>', unsafe_allow_html=True)
92
+ st.write("Word embeddings are a type of representation for text where words are converted into dense numerical vectors. These vectors capture the semantic meaning of words and their relationships with other words in a way that computers can understand.")
93
+
94
+ import streamlit as st
95
+
96
+ st.write("""
97
+ **Word Embedding Techniques**
98
+
99
+ **1. Word2Vec**
100
+ Developed by Google, it uses two main approaches:
101
+ - **CBOW (Continuous Bag of Words):** Predicts a word based on its context.
102
+ - **Skip-Gram:** Predicts the context given a word.
103
+
104
+ **2. GloVe (Global Vectors)**
105
+ Developed by Stanford, it captures word relationships by analyzing co-occurrence statistics of words in a large corpus.
106
+
107
+ **3. FastText**
108
+ Developed by Facebook, it extends Word2Vec by considering subword information, making it better at handling rare and misspelled words.
109
+
110
+ **4. Transformers (Contextual Embeddings)**
111
+ Models like **BERT**, **ELMo**, and **GPT** generate embeddings based on the context in which a word appears, capturing nuanced meanings.
112
+ """)