Update README.md
Browse files
README.md
CHANGED
|
@@ -17,21 +17,15 @@ The model is designed to be used to normalise 17th c. French texts. The best per
|
|
| 17 |
|
| 18 |
### How to use
|
| 19 |
|
| 20 |
-
The model is to be used with the custom pipeline available in
|
| 21 |
|
| 22 |
```
|
| 23 |
-
from transformers import
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
cache_lexicon_path="~/.normalisation_lex.pickle" # optionally set a path to store the processed lexicon (speeds up loading)
|
| 27 |
-
tokeniser = AutoTokenizer.from_pretrained("rbawden/modern_french_normalisation")
|
| 28 |
-
model = AutoModelForSeq2SeqLM.from_pretrained("rbawden/modern_french_normalisation")
|
| 29 |
-
norm_pipeline = NormalisationPipeline(model=model, tokenizer=tokeniser, batch_size=32, beam_size=5, cache_file=cache_lexicon_path)
|
| 30 |
|
| 31 |
list_inputs = ["Elle haïſſoit particulierement le Cardinal de Lorraine;", "Adieu, i'iray chez vous tantoſt vous rendre grace."]
|
| 32 |
-
list_outputs =
|
| 33 |
print(list_outputs)
|
| 34 |
-
|
| 35 |
>> [{'text': 'Elle haïssait particulièrement le Cardinal de Lorraine; ', 'alignment': [([0, 3], [0, 3]), ([5, 12], [5, 12]), ([14, 29], [14, 29]), ([31, 32], [31, 32]), ([34, 41], [34, 41]), ([43, 44], [43, 44]), ([46, 53], [46, 53]), ([54, 54], [54, 54])]}, {'text': "Adieu, j'irai chez vous tantôt vous rendre grâce. ", 'alignment': [([0, 4], [0, 4]), ([5, 5], [5, 5]), ([7, 8], [7, 8]), ([9, 12], [9, 12]), ([14, 17], [14, 17]), ([19, 22], [19, 22]), ([24, 30], [24, 29]), ([32, 35], [31, 34]), ([37, 42], [36, 41]), ([44, 48], [43, 47]), ([49, 49], [48, 48])]}]
|
| 36 |
```
|
| 37 |
|
|
|
|
| 17 |
|
| 18 |
### How to use
|
| 19 |
|
| 20 |
+
The model is to be used with the custom pipeline available in this repository (transformers>=4.21.0):
|
| 21 |
|
| 22 |
```
|
| 23 |
+
from transformers import pipeline
|
| 24 |
+
normaliser = pipeline(model="rbawden/modern_french_normalisation", batch_size=32, beam_size=5, cache_file="./cache.pickle", trust_remote_code=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
list_inputs = ["Elle haïſſoit particulierement le Cardinal de Lorraine;", "Adieu, i'iray chez vous tantoſt vous rendre grace."]
|
| 27 |
+
list_outputs = normaliser(list_inputs)
|
| 28 |
print(list_outputs)
|
|
|
|
| 29 |
>> [{'text': 'Elle haïssait particulièrement le Cardinal de Lorraine; ', 'alignment': [([0, 3], [0, 3]), ([5, 12], [5, 12]), ([14, 29], [14, 29]), ([31, 32], [31, 32]), ([34, 41], [34, 41]), ([43, 44], [43, 44]), ([46, 53], [46, 53]), ([54, 54], [54, 54])]}, {'text': "Adieu, j'irai chez vous tantôt vous rendre grâce. ", 'alignment': [([0, 4], [0, 4]), ([5, 5], [5, 5]), ([7, 8], [7, 8]), ([9, 12], [9, 12]), ([14, 17], [14, 17]), ([19, 22], [19, 22]), ([24, 30], [24, 29]), ([32, 35], [31, 34]), ([37, 42], [36, 41]), ([44, 48], [43, 47]), ([49, 49], [48, 48])]}]
|
| 30 |
```
|
| 31 |
|