Update README.md
Browse files
README.md
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
```python
|
| 2 |
from transformers import RobertaTokenizerFast, AutoModelForSequenceClassification
|
| 3 |
from datasets import load_dataset, Dataset
|
|
@@ -40,8 +44,17 @@ window_size = 5
|
|
| 40 |
context_l = create_windowed_context(raw_dataset, window_size)
|
| 41 |
raw_dataset_window = raw_dataset.map(partial(create_windowed_context_ds, context_l), batched=False, with_indices=True)
|
| 42 |
tokenized_data = raw_dataset_window.map(tokenize_function, batched=True)
|
|
|
|
| 43 |
|
| 44 |
|
|
|
|
| 45 |
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
```
|
|
|
|
| 1 |
+
## Hebrew Conclusion Extraction Model (based on sequence classification)
|
| 2 |
+
|
| 3 |
+
#### How to use
|
| 4 |
+
|
| 5 |
```python
|
| 6 |
from transformers import RobertaTokenizerFast, AutoModelForSequenceClassification
|
| 7 |
from datasets import load_dataset, Dataset
|
|
|
|
| 44 |
context_l = create_windowed_context(raw_dataset, window_size)
|
| 45 |
raw_dataset_window = raw_dataset.map(partial(create_windowed_context_ds, context_l), batched=False, with_indices=True)
|
| 46 |
tokenized_data = raw_dataset_window.map(tokenize_function, batched=True)
|
| 47 |
+
```
|
| 48 |
|
| 49 |
|
| 50 |
+
### Citing
|
| 51 |
|
| 52 |
+
If you use HeConE in your research, please cite [HeRo: RoBERTa and Longformer Hebrew Language Models](http://arxiv.org/abs/2304.11077).
|
| 53 |
+
```
|
| 54 |
+
@article{shalumov2023hero,
|
| 55 |
+
title={HeRo: RoBERTa and Longformer Hebrew Language Models},
|
| 56 |
+
author={Vitaly Shalumov and Harel Haskey},
|
| 57 |
+
year={2023},
|
| 58 |
+
journal={arXiv:2304.11077},
|
| 59 |
+
}
|
| 60 |
```
|