commit files to HF hub
Browse files- config.json +1 -0
- pipeline.py +0 -10
- tokenizer_config.json +1 -0
config.json
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
{
|
|
|
|
| 2 |
"_name_or_path": "rbawden/modern_french_normalisation",
|
| 3 |
"activation_dropout": 0.0,
|
| 4 |
"activation_function": "relu",
|
|
|
|
| 1 |
{
|
| 2 |
+
"_commit_hash": "8d7a2f712cf7ff2b003474563a711218d61cf5c3",
|
| 3 |
"_name_or_path": "rbawden/modern_french_normalisation",
|
| 4 |
"activation_dropout": 0.0,
|
| 5 |
"activation_function": "relu",
|
pipeline.py
CHANGED
|
@@ -10,7 +10,6 @@ import pickle
|
|
| 10 |
from tqdm.auto import tqdm
|
| 11 |
import operator
|
| 12 |
from datasets import load_dataset
|
| 13 |
-
from transformers.pipelines import PIPELINE_REGISTRY
|
| 14 |
|
| 15 |
def _create_modified_versions(entry=None):
|
| 16 |
if entry is None:
|
|
@@ -813,15 +812,6 @@ def normalise_from_stdin(batch_size=32, beam_size=5, cache_file=None, no_postpro
|
|
| 813 |
# print('pred: ' + ''.join([sent['text'][x] for x in range(a[0], max(len(a), a[1]))]) + '')
|
| 814 |
|
| 815 |
return normalised_outputs
|
| 816 |
-
|
| 817 |
-
|
| 818 |
-
PIPELINE_REGISTRY.register_pipeline(
|
| 819 |
-
"modern-french-normalisation",
|
| 820 |
-
pipeline_class=NormalisationPipeline,
|
| 821 |
-
pt_model=AutoModelForSeq2SeqLM,
|
| 822 |
-
default={"pt": ("rbawden/modern_french_normalisation", "main")},
|
| 823 |
-
type="text",
|
| 824 |
-
)
|
| 825 |
|
| 826 |
if __name__ == '__main__':
|
| 827 |
import argparse
|
|
|
|
| 10 |
from tqdm.auto import tqdm
|
| 11 |
import operator
|
| 12 |
from datasets import load_dataset
|
|
|
|
| 13 |
|
| 14 |
def _create_modified_versions(entry=None):
|
| 15 |
if entry is None:
|
|
|
|
| 812 |
# print('pred: ' + ''.join([sent['text'][x] for x in range(a[0], max(len(a), a[1]))]) + '')
|
| 813 |
|
| 814 |
return normalised_outputs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 815 |
|
| 816 |
if __name__ == '__main__':
|
| 817 |
import argparse
|
tokenizer_config.json
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"bos_token": "<s>",
|
| 3 |
"eos_token": "</s>",
|
|
|
|
| 4 |
"name_or_path": "rbawden/modern_french_normalisation",
|
| 5 |
"pad_token": "<pad>",
|
| 6 |
"special_tokens_map_file": "/home/rbawden/.cache/huggingface/transformers/b256f782c7622ee7cd8f990f24154fee35ec73f5b93466b241d479575da80255.9d6cd81ef646692fb1c169a880161ea1cb95f49694f220aced9b704b457e51dd",
|
|
|
|
| 1 |
{
|
| 2 |
"bos_token": "<s>",
|
| 3 |
"eos_token": "</s>",
|
| 4 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 5 |
"name_or_path": "rbawden/modern_french_normalisation",
|
| 6 |
"pad_token": "<pad>",
|
| 7 |
"special_tokens_map_file": "/home/rbawden/.cache/huggingface/transformers/b256f782c7622ee7cd8f990f24154fee35ec73f5b93466b241d479575da80255.9d6cd81ef646692fb1c169a880161ea1cb95f49694f220aced9b704b457e51dd",
|