Meshyboi commited on
Commit
f89d2a2
·
verified ·
1 Parent(s): 342f794

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -1
app.py CHANGED
@@ -97,7 +97,39 @@ def load_tokenizer():
97
  global tokenizer
98
  try:
99
  if tokenizer is None:
100
- tokenizer = RobertaTokenizer.from_pretrained(HF_MODEL_ID)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  return tokenizer
102
  except Exception as e:
103
  raise RuntimeError(f"Error loading tokenizer: {str(e)}")
 
97
  global tokenizer
98
  try:
99
  if tokenizer is None:
100
+ # Download tokenizer files from the tokenizer_files subdirectory
101
+ print("Downloading tokenizer files...")
102
+ tokenizer_files = [
103
+ "tokenizer_files/vocab.json",
104
+ "tokenizer_files/merges.txt",
105
+ "tokenizer_files/tokenizer_config.json",
106
+ "tokenizer_files/special_tokens_map.json"
107
+ ]
108
+
109
+ # Download all tokenizer files
110
+ for file_path in tokenizer_files:
111
+ hf_hub_download(
112
+ repo_id=HF_MODEL_ID,
113
+ filename=file_path,
114
+ cache_dir=None
115
+ )
116
+
117
+ # Get the snapshot directory path by downloading the model file (already done)
118
+ # or by downloading any file and getting its parent directory
119
+ # The tokenizer files are in tokenizer_files/ subdirectory of the snapshot
120
+ model_path = hf_hub_download(
121
+ repo_id=HF_MODEL_ID,
122
+ filename=MODEL_FILENAME,
123
+ cache_dir=None
124
+ )
125
+ snapshot_dir = os.path.dirname(model_path)
126
+ tokenizer_dir = os.path.join(snapshot_dir, "tokenizer_files")
127
+
128
+ print(f"Loading tokenizer from: {tokenizer_dir}")
129
+
130
+ # Load tokenizer from the local tokenizer_files directory
131
+ tokenizer = RobertaTokenizer.from_pretrained(tokenizer_dir)
132
+ print("Tokenizer loaded successfully!")
133
  return tokenizer
134
  except Exception as e:
135
  raise RuntimeError(f"Error loading tokenizer: {str(e)}")