Update pages/7_Advance_vectorization_techniques.py
Browse files
pages/7_Advance_vectorization_techniques.py
CHANGED
|
@@ -447,4 +447,58 @@ if file_type == "Word2Vec":
|
|
| 447 |
|
| 448 |
elif file_type == "Fasttext":
|
| 449 |
st.title(":red[Fasttext]")
|
| 450 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 447 |
|
| 448 |
elif file_type == "Fasttext":
|
| 449 |
st.title(":red[Fasttext]")
|
| 450 |
+
st.markdown(
|
| 451 |
+
"""
|
| 452 |
+
<p><strong>FastText</strong> is an advanced word vectorization technique that enhances word embeddings by considering subword information.</p>
|
| 453 |
+
<p>It is a <span class='highlight'>simple extension</span> of Word2Vec, which converts words into vectors.</p>
|
| 454 |
+
""",
|
| 455 |
+
unsafe_allow_html=True,
|
| 456 |
+
)
|
| 457 |
+
|
| 458 |
+
st.markdown(
|
| 459 |
+
"""
|
| 460 |
+
<h3 style='color: #6A0572;'> Implementing FastText</h3>
|
| 461 |
+
<p>FastText can be implemented using:</p>
|
| 462 |
+
<ul>
|
| 463 |
+
<li><strong>CBOW (Continuous Bag of Words)</strong></li>
|
| 464 |
+
<li><strong>Skip-gram</strong></li>
|
| 465 |
+
</ul>
|
| 466 |
+
""",
|
| 467 |
+
unsafe_allow_html=True,
|
| 468 |
+
)
|
| 469 |
+
|
| 470 |
+
st.markdown(
|
| 471 |
+
"""
|
| 472 |
+
<strong>CBOW Representation:</strong>
|
| 473 |
+
<pre style="background-color:#F7F7F7; padding: 10px; border-radius: 5px;">
|
| 474 |
+
y = f(xi)
|
| 475 |
+
where,
|
| 476 |
+
y = Focus Word
|
| 477 |
+
xi = Context Words
|
| 478 |
+
</pre>
|
| 479 |
+
|
| 480 |
+
<strong>Skip-gram Representation:</strong>
|
| 481 |
+
<pre style="background-color:#F7F7F7; padding: 10px; border-radius: 5px;">
|
| 482 |
+
y = f(xi)
|
| 483 |
+
where,
|
| 484 |
+
y = Context Words
|
| 485 |
+
xi = Focus Word
|
| 486 |
+
</pre>
|
| 487 |
+
""",
|
| 488 |
+
unsafe_allow_html=True,
|
| 489 |
+
)
|
| 490 |
+
|
| 491 |
+
st.markdown(
|
| 492 |
+
"""
|
| 493 |
+
<h3 style='color: #6A0572;'> Problem: Out-of-Vocabulary (OOV)</h3>
|
| 494 |
+
<p>Traditional word embedding techniques fail when encountering new or rare words.</p>
|
| 495 |
+
<p><span class='highlight'>FastText overcomes this issue</span> by breaking words into subword units (character n-grams).</p>
|
| 496 |
+
""",
|
| 497 |
+
unsafe_allow_html=True,
|
| 498 |
+
)
|
| 499 |
+
|
| 500 |
+
|
| 501 |
+
|
| 502 |
+
st.markdown('''Example :
|
| 503 |
+
-
|
| 504 |
+
''')
|