schoginitoys commited on
Commit
9628d55
·
verified ·
1 Parent(s): 483fb33

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +8 -1
src/streamlit_app.py CHANGED
@@ -26,10 +26,17 @@ sentence = st.text_input("Enter your sentence", "Learning is fun")
26
  embedding_dim = st.slider("Embedding Dimension (even only)", min_value=4, max_value=64, value=8, step=2)
27
 
28
  # --- Load tokenizer ---
 
29
  # Set custom cache directory within your app's working directory (which is writable on Spaces)
30
  os.environ['TRANSFORMERS_CACHE'] = './hf_cache'
 
31
  # Load the tokenizer using the custom cache path
32
- tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", cache_dir="./hf_cache")
 
 
 
 
 
33
  # tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
34
  input_ids = tokenizer.encode(sentence, return_tensors="pt")[0]
35
  tokens = tokenizer.convert_ids_to_tokens(input_ids)
 
26
  embedding_dim = st.slider("Embedding Dimension (even only)", min_value=4, max_value=64, value=8, step=2)
27
 
28
  # --- Load tokenizer ---
29
+
30
  # Set custom cache directory within your app's working directory (which is writable on Spaces)
31
  os.environ['TRANSFORMERS_CACHE'] = './hf_cache'
32
+
33
  # Load the tokenizer using the custom cache path
34
+ # tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", cache_dir="./hf_cache")
35
+ from transformers import GPT2TokenizerFast
36
+ # Load tokenizer from bundled local files only
37
+ tokenizer = GPT2TokenizerFast.from_pretrained("./assets/tokenizer", local_files_only=True)
38
+
39
+
40
  # tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
41
  input_ids = tokenizer.encode(sentence, return_tensors="pt")[0]
42
  tokens = tokenizer.convert_ids_to_tokens(input_ids)