|
|
--- |
|
|
license: cc-by-sa-4.0 |
|
|
datasets: |
|
|
- procesaur/Vikipedija |
|
|
language: |
|
|
- sr |
|
|
base_model: |
|
|
- te-sla/Word2VecSr |
|
|
tags: |
|
|
- dict2vec |
|
|
--- |
|
|
|
|
|
|
|
|
<table style="width:100%;height:100%"> |
|
|
<tr> |
|
|
<td colspan=2> |
|
|
<h4><i class="highlight-container"><b class="highlight">SerbDict2vec</b></i></h4> |
|
|
</td> |
|
|
</tr> |
|
|
<tr style="width:100%;height:100%"> |
|
|
<td width=50%> |
|
|
<p>Обучаван над корпусом српског језика Википедија, СрпКор2013 и део СрпКор2021 - 350 милиона речи</p> |
|
|
</td> |
|
|
<td> |
|
|
<p>Trained on the Serbian language corpus compiled from srWikipedia, SrpKor2013, and part of SrpKor2021 - 350 million words</p> |
|
|
</td> |
|
|
</tr> |
|
|
</table> |
|
|
|
|
|
|
|
|
```python |
|
|
from gensim.models import KeyedVectors |
|
|
|
|
|
# Load the vectors |
|
|
d2v_vectors = KeyedVectors.load("D:/modeli/dict2vec/SerbDict2vec") |
|
|
|
|
|
# Check word vector |
|
|
print(d2v_vectors["klijent"]) |
|
|
|
|
|
``` |
|
|
``` |
|
|
[-3.1600e-01 -3.4110e+00 1.2158e+01 3.7950e+00 6.1200e-01 -3.1000e-01 |
|
|
-9.7000e-02 -5.0000e-02 -5.2000e-02 -9.4000e-01 3.5600e-01 -6.0400e-01 |
|
|
-2.3700e-01 1.1600e-01 -4.5500e-01 1.6100e-01 2.2500e-01 -6.4700e-01 |
|
|
5.4600e-01 -7.8000e-02 3.5500e-01 5.8000e-02 -3.0000e-02 3.3000e-01 |
|
|
-1.5700e-01 -5.9700e-01 1.5000e-02 1.9600e-01 1.0000e-03 1.5800e-01 |
|
|
4.3300e-01 -5.0000e-03 -3.0700e-01 -2.6000e-01 -5.2500e-01 7.4000e-02 |
|
|
-2.7000e-02 1.8800e-01 5.6000e-02 -2.5200e-01 3.0700e-01 -4.3000e-02 |
|
|
5.9000e-02 -6.6000e-02 -1.0000e-02 1.3900e-01 7.1000e-02 -4.2000e-02 |
|
|
-3.2000e-02 -1.3100e-01 1.4000e-02 -8.9000e-02 -3.2200e-01 -6.2000e-02 |
|
|
-1.0500e-01 1.0800e-01 1.6100e-01 -1.3600e-01 -1.5400e-01 4.0000e-02 |
|
|
-5.1000e-02 1.1000e-02 2.6600e-01 3.0000e-03 -1.3800e-01 2.3400e-01 |
|
|
-2.9300e-01 1.5500e-01 2.5600e-01 2.7200e-01 1.2600e-01 1.9000e-01 |
|
|
-7.2000e-02 7.3000e-02 1.1700e-01 -1.1100e-01 5.9000e-02 -2.1100e-01 |
|
|
-1.8700e-01 -2.0000e-03 -3.6000e-02 -2.0400e-01 3.1300e-01 1.1600e-01 |
|
|
1.4800e-01 1.3000e-02 2.5200e-01 1.9700e-01 -6.7000e-02 4.5000e-02 |
|
|
1.3100e-01 -8.0000e-03 5.9000e-02 3.0800e-01 -3.2200e-01 -5.3000e-02 |
|
|
-1.5500e-01 -2.2100e-01 -7.6000e-02 1.3600e-01] |
|
|
``` |
|
|
|
|
|
```python |
|
|
# Find most similar words |
|
|
print(d2v_vectors.most_similar("klijent", topn=5)) |
|
|
``` |
|
|
``` |
|
|
[('interfejs', 0.9971136450767517), |
|
|
('mušterija', 0.996911883354187), |
|
|
('provajder', 0.9968076348304749), |
|
|
('sugrađanin', 0.9967014789581299), |
|
|
('komšija', 0.9965119361877441)] |
|
|
``` |
|
|
|
|
|
<div class="inline-flex flex-col" style="line-height: 1.5;padding-right:50px"> |
|
|
<div style="text-align: center; margin-top: 3px; font-size: 16px; font-weight: 800">Author</div> |
|
|
<a href="https://huggingface.co/rankas"> |
|
|
<div class="flex"> |
|
|
<div |
|
|
style="display:DISPLAY_1; margin-left: auto; margin-right: auto; width: 92px; height:92px; border-radius: 50%; |
|
|
background-size: cover; background-image: url('https://cdn-avatars.huggingface.co/v1/production/uploads/63f8fa204ef4aacb65a00043/IlrBetI15qnGsc798R6tO.jpeg?w=200&h=200&f=face')"> |
|
|
</div> |
|
|
</div> |
|
|
</a> |
|
|
<div style="text-align: center; font-size: 16px; font-weight: 800">Ranka Stanković</div> |
|
|
<div> |
|
|
<a href="https://huggingface.co/rankas"> |
|
|
<div style="text-align: center; font-size: 14px;">@rankas</div> |
|
|
</a> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
<div class="inline-flex flex-col" style="line-height: 1.5;padding-right:50px"> |
|
|
<div style="text-align: center; margin-top: 3px; font-size: 16px; font-weight: 800">Author</div> |
|
|
<a href="https://huggingface.co/rankas"> |
|
|
<div class="flex"> |
|
|
<div |
|
|
style="display:DISPLAY_1; margin-left: auto; margin-right: auto; width: 92px; height:92px; border-radius: 50%; |
|
|
background-size: cover; background-image: url('https://cdn-avatars.huggingface.co/v1/production/uploads/no-auth/gfI5-noC3Si2qlV6vRiwL.png?w=200&h=200&f=face')"> |
|
|
</div> |
|
|
</div> |
|
|
</a> |
|
|
<div style="text-align: center; font-size: 16px; font-weight: 800">Jovana Rađenović</div> |
|
|
<div> |
|
|
<a href="https://huggingface.co/JovanaR"> |
|
|
<div style="text-align: center; font-size: 14px;">@JovanaR</div> |
|
|
</a> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<div class="inline-flex flex-col" style="line-height: 1.5;"> |
|
|
<div style="text-align: center; margin-top: 3px; font-size: 16px; font-weight: 800">Computation</div> |
|
|
<a href="https://tesla.rgf.bg.ac.rs"> |
|
|
<div class="flex"> |
|
|
<div |
|
|
style="display:DISPLAY_1; margin-left: auto; margin-right: auto; width: 92px; height:92px; border-radius: 50%; |
|
|
background-size: cover; background-image: url(https://cdn-avatars.huggingface.co/v1/production/uploads/63bc254fb8c61b8aa496a39b/TfM_-sc8-b34ddfhHBGTA.png?w=200&h=200&f=face)"> |
|
|
</div> |
|
|
</div> |
|
|
</a> |
|
|
<div style="text-align: center; font-size: 16px; font-weight: 800">TESLA project</div> |
|
|
<div> |
|
|
<a href="https://huggingface.co/te-sla"> |
|
|
<div style="text-align: center; font-size: 14px;">@te-sla</div> |
|
|
</a> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
<br/><br/> |
|
|
## Cit. |
|
|
|
|
|
```bibtex |
|
|
@inproceedings{stankovic-dict2vec, |
|
|
author = {Ranka Stanković, Jovana Rađenović, Mihailo Škorić, Marko Putniković}, |
|
|
title = {Learning Word Embeddings using Lexical Resources and Corpora}, |
|
|
booktitle = {15th International Conference on Information Society and Technology, ISIST 2025, Kopaonik}, |
|
|
year = {2025}, |
|
|
address = {Kopaonik, Belgrade} |
|
|
url = {https://doi.org/10.5281/zenodo.15093900} |
|
|
} |
|
|
``` |
|
|
<div id="zastava"> |
|
|
<div class="grb"> |
|
|
<img src="https://www.ai.gov.rs/img/logo_60x120-2.png" style="position:relative; left:30px; z-index:10; height:85px"> |
|
|
</div> |
|
|
<table width=100% style="border:0px"> |
|
|
<tr style="background-color:#C6363C;width:100%;border:0px;height:30px"><td style="width:100vw"></td></tr> |
|
|
<tr style="background-color:#0C4076;width:100%;border:0px;height:30px"><td></td></tr> |
|
|
<tr style="background-color:#ffffff;width:100%;border:0px;height:30px"><td></td></tr> |
|
|
</table> |
|
|
</div> |
|
|
|
|
|
<table style="width:100%;height:100%"> |
|
|
<tr style="width:100%;height:100%"> |
|
|
<td width=50%> |
|
|
<p>Истраживање jе спроведено уз подршку Фонда за науку Републике Србиjе, #7276, Text Embeddings – Serbian Language Applications – TESLA</p> |
|
|
</td> |
|
|
<td> |
|
|
<p>This research was supported by the Science Fund of the Republic of Serbia, #7276, Text Embeddings - Serbian Language Applications - TESLA</p> |
|
|
</td> |
|
|
</tr> |
|
|
</table> |
|
|
|
|
|
|
|
|
|
|
|
<style> |
|
|
.ffeat: { |
|
|
color:red |
|
|
} |
|
|
|
|
|
.cover { |
|
|
width: 100%; |
|
|
margin-bottom: 5pt |
|
|
} |
|
|
|
|
|
.highlight-container, .highlight { |
|
|
position: relative; |
|
|
text-decoration:none |
|
|
} |
|
|
|
|
|
.highlight-container { |
|
|
display: inline-block; |
|
|
|
|
|
} |
|
|
|
|
|
.highlight{ |
|
|
color:white; |
|
|
text-transform:uppercase; |
|
|
font-size: 16pt; |
|
|
} |
|
|
|
|
|
.highlight-container{ |
|
|
padding:5px 10px |
|
|
} |
|
|
|
|
|
.highlight-container:before { |
|
|
content: " "; |
|
|
display: block; |
|
|
height: 100%; |
|
|
width: 100%; |
|
|
margin-left: 0px; |
|
|
margin-right: 0px; |
|
|
position: absolute; |
|
|
background: #e80909; |
|
|
transform: rotate(2deg); |
|
|
top: -1px; |
|
|
left: -1px; |
|
|
border-radius: 20% 25% 20% 24%; |
|
|
padding: 10px 18px 18px 10px; |
|
|
} |
|
|
|
|
|
div.grb, #zastava>table { |
|
|
position:absolute; |
|
|
top:0px; |
|
|
left: 0px; |
|
|
margin:0px |
|
|
} |
|
|
|
|
|
div.grb>img, #zastava>table{ |
|
|
margin:0px |
|
|
} |
|
|
|
|
|
#zastava { |
|
|
position: relative; |
|
|
margin-bottom:120px |
|
|
} |
|
|
|
|
|
p { |
|
|
font-size:14pt |
|
|
} |
|
|
</style> |