Kevin Hamon commited on
Commit
1b49565
·
1 Parent(s): 6146bce

remove custom from_pretrained

Browse files
Files changed (1) hide show
  1. tokenizer.py +0 -29
tokenizer.py CHANGED
@@ -364,35 +364,6 @@ class ChessTokenizer(PreTrainedTokenizer):
364
  # Non-fatal; we still saved vocab and config
365
  pass
366
 
367
- @classmethod
368
- def from_pretrained(cls, load_directory: str) -> "ChessTokenizer":
369
- """Load tokenizer from a directory previously written with `save_pretrained`.
370
-
371
- This primarily reads the vocab file and constructs the tokenizer.
372
- If a `tokenizer_config.json` exists it will be consulted for the
373
- vocab filename and special tokens (but we still instantiate using
374
- the provided class).
375
- """
376
- config_path = os.path.join(load_directory, "tokenizer_config.json")
377
- vocab_file = None
378
- if os.path.exists(config_path):
379
- try:
380
- with open(config_path, "r", encoding="utf-8") as f:
381
- cfg = json.load(f)
382
- vocab_file = os.path.join(load_directory, cfg.get("vocab_file", "vocab.json"))
383
- except Exception:
384
- pass
385
-
386
- if vocab_file is None:
387
- # Fallback: look for a vocab file in the directory
388
- candidates = [p for p in os.listdir(load_directory) if p.endswith("vocab.json")]
389
- if candidates:
390
- vocab_file = os.path.join(load_directory, candidates[0])
391
-
392
- if vocab_file is None or not os.path.exists(vocab_file):
393
- raise FileNotFoundError(f"No vocab file found in {load_directory}")
394
-
395
- return cls(vocab_file=vocab_file)
396
 
397
  def count_vocab_from_dataset(
398
  dataset_name: str = "dlouapre/lichess_2025-01_1M",
 
364
  # Non-fatal; we still saved vocab and config
365
  pass
366
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367
 
368
  def count_vocab_from_dataset(
369
  dataset_name: str = "dlouapre/lichess_2025-01_1M",