Update README.md
Browse files
README.md
CHANGED
|
@@ -31,30 +31,22 @@ language:
|
|
| 31 |
</table>
|
| 32 |
|
| 33 |
```python
|
| 34 |
-
>>> from transformers import
|
| 35 |
-
>>>
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
```
|
| 38 |
|
| 39 |
```python
|
| 40 |
-
>>>
|
| 41 |
-
>>> from torch import LongTensor, no_grad
|
| 42 |
-
>>> from scipy import spatial
|
| 43 |
-
>>> tokenizer = AutoTokenizer.from_pretrained('te-sla/teslaXLM')
|
| 44 |
-
>>> model = AutoModelForMaskedLM.from_pretrained('te-sla/teslaXLM', output_hidden_states=True)
|
| 45 |
-
>>> x = " pas"
|
| 46 |
-
>>> y = " mačka"
|
| 47 |
-
>>> z = " svemir"
|
| 48 |
-
>>> tensor_x = LongTensor(tokenizer.encode(x, add_special_tokens=False)).unsqueeze(0)
|
| 49 |
-
>>> tensor_y = LongTensor(tokenizer.encode(y, add_special_tokens=False)).unsqueeze(0)
|
| 50 |
-
>>> tensor_z = LongTensor(tokenizer.encode(z, add_special_tokens=False)).unsqueeze(0)
|
| 51 |
-
>>> model.eval()
|
| 52 |
-
>>> with no_grad():
|
| 53 |
-
>>> vektor_x = model(input_ids=tensor_x).hidden_states[-1].squeeze()
|
| 54 |
-
>>> vektor_y = model(input_ids=tensor_y).hidden_states[-1].squeeze()
|
| 55 |
-
>>> vektor_z = model(input_ids=tensor_z).hidden_states[-1].squeeze()
|
| 56 |
-
>>> print(spatial.distance.cosine(vektor_x, vektor_y))
|
| 57 |
-
>>> print(spatial.distance.cosine(vektor_x, vektor_z))
|
| 58 |
```
|
| 59 |
|
| 60 |
<div class="inline-flex flex-col" style="line-height: 1.5;padding-right:50px">
|
|
@@ -99,18 +91,19 @@ language:
|
|
| 99 |
</div>
|
| 100 |
|
| 101 |
## Cit.
|
| 102 |
-
|
| 103 |
```bibtex
|
| 104 |
-
@inproceedings{
|
| 105 |
-
author = {Mihailo
|
| 106 |
-
title = {
|
| 107 |
booktitle = {ARTIFICAL INTELLIGENCE CONFERENCE},
|
| 108 |
-
year = {
|
| 109 |
address = {Belgrade}
|
| 110 |
publisher = {SASA, Belgrade},
|
| 111 |
url = {}
|
| 112 |
}
|
| 113 |
```
|
|
|
|
| 114 |
<br/>
|
| 115 |
<div id="zastava">
|
| 116 |
<div class="grb">
|
|
|
|
| 31 |
</table>
|
| 32 |
|
| 33 |
```python
|
| 34 |
+
>>> from transformers import T5ForConditionalGeneration, T5TokenizerFast
|
| 35 |
+
>>> import torch
|
| 36 |
+
|
| 37 |
+
>>> model = T5ForConditionalGeneration.from_pretrained("te-sla/pilot5")
|
| 38 |
+
>>> tokenizer = T5TokenizerFast.from_pretrained("te-sla/pilot5")
|
| 39 |
+
>>> text = "ova sekcija sadrži ideje za prioritetne pravce/teme razvoja jezičkih tehnologija (NLP) za srpski jezik. Alternativni pravci razvoja su ukratko pobrojani u odeljku H2."
|
| 40 |
+
>>> input = t(text, return_tensors="pt")
|
| 41 |
+
|
| 42 |
+
>>> with torch.no_grad():
|
| 43 |
+
>>> output = model.generate(input_ids=input["input_ids"], attention_mask=input["attention_mask"], do_sample=False)
|
| 44 |
+
>>> decoded_output = t.decode(output[0], skip_special_tokens=True)
|
| 45 |
+
>>> print(decoded_output)
|
| 46 |
```
|
| 47 |
|
| 48 |
```python
|
| 49 |
+
>>> ova sekcija sadrži ideje za prioritetne pravce/teme razvoja jezičkih tehnologija (NLP) za srpski jezik. Alternativni pravci razvoja su ukratko pobrojani u odeljku H2.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
```
|
| 51 |
|
| 52 |
<div class="inline-flex flex-col" style="line-height: 1.5;padding-right:50px">
|
|
|
|
| 91 |
</div>
|
| 92 |
|
| 93 |
## Cit.
|
| 94 |
+
<!--div>
|
| 95 |
```bibtex
|
| 96 |
+
@inproceedings{skorict5,
|
| 97 |
+
author = {Mihailo Škorić},
|
| 98 |
+
title = {Pilot Text to Text Transfer Transformer Model for Serbian Language},
|
| 99 |
booktitle = {ARTIFICAL INTELLIGENCE CONFERENCE},
|
| 100 |
+
year = {2025},
|
| 101 |
address = {Belgrade}
|
| 102 |
publisher = {SASA, Belgrade},
|
| 103 |
url = {}
|
| 104 |
}
|
| 105 |
```
|
| 106 |
+
</div-->
|
| 107 |
<br/>
|
| 108 |
<div id="zastava">
|
| 109 |
<div class="grb">
|