Spaces:
Sleeping
Sleeping
File size: 1,061 Bytes
8f9b24c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
from gradio import Dataframe, Interface, Number, TextArea, Blocks, Row
from pandas import DataFrame
from transformers import CLIPTokenizer, PreTrainedTokenizer
TOKENIZER: PreTrainedTokenizer = CLIPTokenizer.from_pretrained(
"openai/clip-vit-large-patch14"
)
def to_tokens(prompt: str) -> tuple[int, DataFrame]:
tokens = TOKENIZER.tokenize(prompt)
ids = TOKENIZER.convert_tokens_to_ids(tokens)
return (
len(tokens),
DataFrame({
"No": range(1, len(tokens)+1),
"Token": tokens,
"ID": ids
})
)
def main():
app = Interface(
title="Token Counter",
fn=to_tokens,
inputs=TextArea(label="Prompt"),
outputs=[
Number(label="Token Count"),
Dataframe(
headers=[ "No", "Token", "ID" ],
datatype=[ "number", "str", "number" ],
label="Tokens"
)
],
live=True,
allow_flagging="never"
)
app.launch()
if __name__ == "__main__":
main()
|