Spaces:
Sleeping
Sleeping
| from gradio import Dataframe, Interface, Number, TextArea, Blocks, Row | |
| from pandas import DataFrame | |
| from transformers import CLIPTokenizer, PreTrainedTokenizer | |
| TOKENIZER: PreTrainedTokenizer = CLIPTokenizer.from_pretrained( | |
| "openai/clip-vit-large-patch14" | |
| ) | |
| def to_tokens(prompt: str) -> tuple[int, DataFrame]: | |
| tokens = TOKENIZER.tokenize(prompt) | |
| ids = TOKENIZER.convert_tokens_to_ids(tokens) | |
| return ( | |
| len(tokens), | |
| DataFrame({ | |
| "No": range(1, len(tokens)+1), | |
| "Token": tokens, | |
| "ID": ids | |
| }) | |
| ) | |
| def main(): | |
| app = Interface( | |
| title="Token Counter", | |
| fn=to_tokens, | |
| inputs=TextArea(label="Prompt"), | |
| outputs=[ | |
| Number(label="Token Count"), | |
| Dataframe( | |
| headers=[ "No", "Token", "ID" ], | |
| datatype=[ "number", "str", "number" ], | |
| label="Tokens" | |
| ) | |
| ], | |
| live=True, | |
| allow_flagging="never" | |
| ) | |
| app.launch() | |
| if __name__ == "__main__": | |
| main() | |