Update app.py
Browse files
app.py
CHANGED
|
@@ -1,13 +1,20 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from transformers import AutoTokenizer
|
| 3 |
|
| 4 |
-
|
|
|
|
|
|
|
| 5 |
def tokenize_text(text, tokenizer_name):
|
| 6 |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
|
| 7 |
tokenized_text = tokenizer.tokenize(text)
|
| 8 |
input_ids = tokenizer.convert_tokens_to_ids(tokenized_text)
|
| 9 |
-
decoded_text = tokenizer.decode(input_ids)
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
|
| 13 |
# Define available tokenizers
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from transformers import AutoTokenizer
|
| 3 |
|
| 4 |
+
chart_html = gr.HTML(label="Token Frequency Chart")
|
| 5 |
+
|
| 6 |
+
# Define a function to tokenize text and create visualization
|
| 7 |
def tokenize_text(text, tokenizer_name):
|
| 8 |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
|
| 9 |
tokenized_text = tokenizer.tokenize(text)
|
| 10 |
input_ids = tokenizer.convert_tokens_to_ids(tokenized_text)
|
| 11 |
+
decoded_text = tokenizer.decode(input_ids)
|
| 12 |
+
|
| 13 |
+
# Create visualization HTML
|
| 14 |
+
chart_html = create_token_frequency_chart(tokenized_text)
|
| 15 |
+
|
| 16 |
+
return f"Tokenized Text: {tokenized_text}\nInput IDs: {input_ids}\nDecoded Text: {decoded_text}", chart_html
|
| 17 |
+
|
| 18 |
|
| 19 |
|
| 20 |
# Define available tokenizers
|