Commit
·
2ee9ca3
1
Parent(s):
37f3e35
Update README.md
Browse files
README.md
CHANGED
|
@@ -146,7 +146,11 @@ def extract_triplets_typed(text):
|
|
| 146 |
return triplets
|
| 147 |
|
| 148 |
# Load model and tokenizer
|
| 149 |
-
tokenizer = AutoTokenizer.from_pretrained("Babelscape/mrebel-large", src_lang="en_XX", tgt_lang="tp_XX")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
model = AutoModelForSeq2SeqLM.from_pretrained("Babelscape/mrebel-large")
|
| 151 |
gen_kwargs = {
|
| 152 |
"max_length": 256,
|
|
|
|
| 146 |
return triplets
|
| 147 |
|
| 148 |
# Load model and tokenizer
|
| 149 |
+
tokenizer = AutoTokenizer.from_pretrained("Babelscape/mrebel-large", src_lang="en_XX", tgt_lang="tp_XX")
|
| 150 |
+
# Here we set English ("en_XX") as source language. To change the source language swap the first token of the input for your desired language or change to supported language. For catalan ("ca_XX") or greek ("el_EL") (not included in mBART pretraining) you need a workaround:
|
| 151 |
+
# tokenizer._src_lang = "ca_XX"
|
| 152 |
+
# tokenizer.cur_lang_code_id = tokenizer.convert_tokens_to_ids("ca_XX")
|
| 153 |
+
# tokenizer.set_src_lang_special_tokens("ca_XX")
|
| 154 |
model = AutoModelForSeq2SeqLM.from_pretrained("Babelscape/mrebel-large")
|
| 155 |
gen_kwargs = {
|
| 156 |
"max_length": 256,
|