Rajesh6 commited on
Commit
4825ee3
·
verified ·
1 Parent(s): 5cd7146

Update pages/Introduction.py

Browse files
Files changed (1) hide show
  1. pages/Introduction.py +23 -1
pages/Introduction.py CHANGED
@@ -65,4 +65,26 @@ st.write("The **TF-IDF Vectorizer** is a popular technique in Natural Language P
65
 
66
  st.write('**Term Frequency (TF)** \n - Measures how often a word appears in a single document. \n - Formula: \n _TF_ = Number of times the word appears in the document / Total number of words in the document' )
67
  st.write('**Inverse Document Frequency (IDF)** \n Measures how unique or rare a word is across all documents in the corpus. \n - Formula: \n _IDF_ = log(Total no.of documents / No of Documnets containing the word) \n Words that appear in many documents (like "the" or "and") will have a low IDF value, while unique words (like "NLP") will have a higher IDF.')
68
- st.write('**TF - IDF Score:** \n - Combines TF and IDF to calculate the importance of a word in a document. \n - Formula: \n TF - IDF = TF x IDF \n Words that are frequent in a document but rare in the overall corpus get a higher score.')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
  st.write('**Term Frequency (TF)** \n - Measures how often a word appears in a single document. \n - Formula: \n _TF_ = Number of times the word appears in the document / Total number of words in the document' )
67
  st.write('**Inverse Document Frequency (IDF)** \n Measures how unique or rare a word is across all documents in the corpus. \n - Formula: \n _IDF_ = log(Total no.of documents / No of Documnets containing the word) \n Words that appear in many documents (like "the" or "and") will have a low IDF value, while unique words (like "NLP") will have a higher IDF.')
68
+ st.write('**TF - IDF Score:** \n - Combines TF and IDF to calculate the importance of a word in a document. \n - Formula: \n _TF - IDF = TF x IDF_ \n Words that are frequent in a document but rare in the overall corpus get a higher score.')
69
+
70
+ st.write("Examples:")
71
+
72
+ st.write("""
73
+ ### Example
74
+ **Consider these two documents:**
75
+
76
+ - "I love NLP"
77
+ - "NLP is amazing"
78
+
79
+ #### Step 1: Calculate TF
80
+ - "NLP" appears once in each document, so its TF is **1/3** in both.
81
+ - Words like "love" and "amazing" also have a TF of **1/3**.
82
+
83
+ #### Step 2: Calculate IDF
84
+ - "NLP" appears in both documents, so its IDF is **log(2/2) = 0**.
85
+ - "love" and "amazing" appear in only one document each, so their IDF is **log(2/1) = 0.69**.
86
+
87
+ #### Step 3: Compute TF-IDF
88
+ - "NLP" gets a TF-IDF score of **1/3 × 0 = 0** (not unique).
89
+ - "love" and "amazing" get scores of **1/3 × 0.69 = 0.23** (more unique).
90
+ """)