Update README.md
Browse files
README.md
CHANGED
|
@@ -8,3 +8,21 @@ This is a model for word-based spell correction tasks. This model is generated b
|
|
| 8 |
This model works best for word-based spell correction(not so good with the sequence of words).
|
| 9 |
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
This model works best for word-based spell correction(not so good with the sequence of words).
|
| 9 |
|
| 10 |
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 16 |
+
tokenizer = AutoTokenizer.from_pretrained("veghar/spell_correct_bart_base")
|
| 17 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("veghar/spell_correct_bart_base")
|
| 18 |
+
text='believ'
|
| 19 |
+
text_tok=tokenizer(text,padding=True, return_tensors='tf')
|
| 20 |
+
input_ids = text_tok['input_ids']
|
| 21 |
+
outputs = model.generate(input_ids=input_ids, max_length=10,num_return_sequences=3)
|
| 22 |
+
corrected_sentences = tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
| 23 |
+
|
| 24 |
+
print('Misspelled word:', text)
|
| 25 |
+
print('Corrected word:', corrected_sentences)
|
| 26 |
+
|
| 27 |
+
>Misspelled word: believ
|
| 28 |
+
>Corrected word: ['believe', 'belief', 'believer']
|