Update pages/Introduction.py
Browse files- pages/Introduction.py +3 -3
pages/Introduction.py
CHANGED
|
@@ -63,6 +63,6 @@ st.write("Vector for “I love NLP”: [1, 1, 1, 0, 0]")
|
|
| 63 |
st.markdown('<p style="color:lightblue;"><b>b. Term Frequency-Inverse Document Frequency (TF-IDF)</b></p>', unsafe_allow_html=True)
|
| 64 |
st.write("The **TF-IDF Vectorizer** is a popular technique in Natural Language Processing (NLP) used to convert text into numerical values that can be used by machine learning models. It stands for Term Frequency-Inverse Document Frequency and helps highlight the importance of words in a document relative to a collection of documents (called a corpus).")
|
| 65 |
|
| 66 |
-
st.write(
|
| 67 |
-
st.write(
|
| 68 |
-
st.write(
|
|
|
|
| 63 |
st.markdown('<p style="color:lightblue;"><b>b. Term Frequency-Inverse Document Frequency (TF-IDF)</b></p>', unsafe_allow_html=True)
|
| 64 |
st.write("The **TF-IDF Vectorizer** is a popular technique in Natural Language Processing (NLP) used to convert text into numerical values that can be used by machine learning models. It stands for Term Frequency-Inverse Document Frequency and helps highlight the importance of words in a document relative to a collection of documents (called a corpus).")
|
| 65 |
|
| 66 |
+
st.write('**Term Frequency (TF)** \n - Measures how often a word appears in a single document. \n - Formula: \n _ TF _ = Number of times the word appears in the document / Total number of words in the document' )
|
| 67 |
+
st.write('**Inverse Document Frequency (IDF)** \n Measures how unique or rare a word is across all documents in the corpus. \n - Formula: \n _ IDF _ = log(Total no.of documents / No of Documnets containing the word) \n Words that appear in many documents (like "the" or "and") will have a low IDF value, while unique words (like "NLP") will have a higher IDF.')
|
| 68 |
+
st.write('**TF - IDF Score: \n - Combines TF and IDF to calculate the importance of a word in a document. \n - Formula: \n TF - IDF = TF x IDF \n Words that are frequent in a document but rare in the overall corpus get a higher score.')
|