saracandu commited on
Commit
886c8fb
·
verified ·
1 Parent(s): 75bfa8f

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer.py +3 -5
tokenizer.py CHANGED
@@ -1,9 +1,10 @@
1
  import re
2
  import os
3
- import json
4
  from typing import Any, Dict, List, Optional, Tuple, Union
5
  from transformers import PreTrainedTokenizer
6
  from transformers.utils import logging
 
 
7
 
8
  logger = logging.get_logger(__name__)
9
 
@@ -17,10 +18,7 @@ def load_json(path: str) -> Union[Dict, List]:
17
  Returns:
18
  Union[Dict, List]: The parsed content of the JSON file, which could be a dictionary or a list.
19
  """
20
- full_path = path
21
- if not os.path.exists(full_path):
22
- # carica relativo alla posizione del file tokenizer.py
23
- full_path = os.path.join(os.path.dirname(__file__), path)
24
  with open(full_path, "r", encoding="utf-8") as f:
25
  return json.load(f)
26
 
 
1
  import re
2
  import os
 
3
  from typing import Any, Dict, List, Optional, Tuple, Union
4
  from transformers import PreTrainedTokenizer
5
  from transformers.utils import logging
6
+ from pathlib import Path
7
+ import json
8
 
9
  logger = logging.get_logger(__name__)
10
 
 
18
  Returns:
19
  Union[Dict, List]: The parsed content of the JSON file, which could be a dictionary or a list.
20
  """
21
+ full_path = Path(__file__).parent / path
 
 
 
22
  with open(full_path, "r", encoding="utf-8") as f:
23
  return json.load(f)
24