from gradio import Dataframe, Interface, Number, TextArea, Blocks, Row from pandas import DataFrame from transformers import CLIPTokenizer, PreTrainedTokenizer TOKENIZER: PreTrainedTokenizer = CLIPTokenizer.from_pretrained( "openai/clip-vit-large-patch14" ) def to_tokens(prompt: str) -> tuple[int, DataFrame]: tokens = TOKENIZER.tokenize(prompt) ids = TOKENIZER.convert_tokens_to_ids(tokens) return ( len(tokens), DataFrame({ "No": range(1, len(tokens)+1), "Token": tokens, "ID": ids }) ) def main(): app = Interface( title="Token Counter", fn=to_tokens, inputs=TextArea(label="Prompt"), outputs=[ Number(label="Token Count"), Dataframe( headers=[ "No", "Token", "ID" ], datatype=[ "number", "str", "number" ], label="Tokens" ) ], live=True, allow_flagging="never" ) app.launch() if __name__ == "__main__": main()