Spaces:
Build error
Build error
Update pages/15_Plus_Detokenizer.py
Browse files- pages/15_Plus_Detokenizer.py +0 -14
pages/15_Plus_Detokenizer.py
CHANGED
|
@@ -145,20 +145,6 @@ if st.button("Detokenize"):
|
|
| 145 |
st.write("Detokenized sentence:")
|
| 146 |
st.write(detokenized_sentence)
|
| 147 |
|
| 148 |
-
# Tokenization section
|
| 149 |
-
st.header("Tokenization")
|
| 150 |
-
sentence = st.text_input("Enter a sentence to tokenize:", "cr8 lg")
|
| 151 |
-
|
| 152 |
-
def format_token_ids(token_ids):
|
| 153 |
-
formatted_ids = [str(token_id).zfill(5) for token_id in token_ids]
|
| 154 |
-
return ''.join(formatted_ids)
|
| 155 |
-
|
| 156 |
-
if st.button("Tokenize"):
|
| 157 |
-
input_ids = tokenizer(sentence, return_tensors='pt').input_ids
|
| 158 |
-
token_ids_list = input_ids[0].tolist()
|
| 159 |
-
formatted_token_ids = format_token_ids(token_ids_list)
|
| 160 |
-
st.write("Tokenized input IDs (formatted):")
|
| 161 |
-
st.write(formatted_token_ids)
|
| 162 |
|
| 163 |
# Load the model
|
| 164 |
gpt2 = AutoModelForCausalLM.from_pretrained('gpt2')
|
|
|
|
| 145 |
st.write("Detokenized sentence:")
|
| 146 |
st.write(detokenized_sentence)
|
| 147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
|
| 149 |
# Load the model
|
| 150 |
gpt2 = AutoModelForCausalLM.from_pretrained('gpt2')
|