Update code
Browse files
README.md
CHANGED
|
@@ -52,6 +52,7 @@ The training dataset is based on [The Multilingual Amazon Reviews Corpus](https:
|
|
| 52 |
The model can be used as follows:
|
| 53 |
|
| 54 |
```python
|
|
|
|
| 55 |
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
| 56 |
|
| 57 |
tokenizer = AutoTokenizer.from_pretrained("msislam/code-mixed-language-detection-XLMRoberta")
|
|
@@ -60,14 +61,14 @@ model = AutoModelForTokenClassification.from_pretrained("msislam/code-mixed-lang
|
|
| 60 |
|
| 61 |
text = 'Hala Madrid y nada más. It means Go Madrid and nothing more.'
|
| 62 |
|
| 63 |
-
|
| 64 |
|
| 65 |
with torch.no_grad():
|
| 66 |
logits = model(**inputs).logits
|
| 67 |
|
| 68 |
labels_predicted = logits.argmax(-1)
|
| 69 |
|
| 70 |
-
lang_tag_predicted = [
|
| 71 |
lang_tag_predicted
|
| 72 |
```
|
| 73 |
|
|
|
|
| 52 |
The model can be used as follows:
|
| 53 |
|
| 54 |
```python
|
| 55 |
+
import torch
|
| 56 |
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
| 57 |
|
| 58 |
tokenizer = AutoTokenizer.from_pretrained("msislam/code-mixed-language-detection-XLMRoberta")
|
|
|
|
| 61 |
|
| 62 |
text = 'Hala Madrid y nada más. It means Go Madrid and nothing more.'
|
| 63 |
|
| 64 |
+
inputs = tokenizer(text, add_special_tokens= False, return_tensors="pt")
|
| 65 |
|
| 66 |
with torch.no_grad():
|
| 67 |
logits = model(**inputs).logits
|
| 68 |
|
| 69 |
labels_predicted = logits.argmax(-1)
|
| 70 |
|
| 71 |
+
lang_tag_predicted = [model.config.id2label[t.item()] for t in labels_predicted[0]]
|
| 72 |
lang_tag_predicted
|
| 73 |
```
|
| 74 |
|