Spaces:
Sleeping
Sleeping
| import torch | |
| import random | |
| import gradio as gr | |
| from language_bpe import BPETokenizer | |
| tokenizer = BPETokenizer() | |
| tokenizer.load('models/english_5000.model') | |
| def inference(input_text): | |
| encoding = tokenizer.encode_ordinary(input_text) | |
| # sentence = [tokenizer.decode([x]) for x in encoding] | |
| # color_sentence = "" | |
| # for word in sentence: | |
| # background_color = random.randint(40, 47) | |
| # color_sentence += f"\033[0;37;{background_color}m {word}" | |
| color_sentence = f"\033[0;37;{41}m Black" + f"\033[0;37;{42}m Black" | |
| return len(encoding), color_sentence, encoding | |
| title = "Bilingual tokenizer" | |
| description = "A simple Gradio interface to see tokenization of Hindi and English(Hinglish) text" | |
| examples = [["He walked into the basement with the horror movie from the night before playing in his head."], | |
| ["Henry couldn't decide if he was an auto mechanic or a priest."], | |
| ["Poison ivy grew through the fence they said was impenetrable."], | |
| ] | |
| demo = gr.Interface( | |
| inference, | |
| inputs = [ | |
| gr.Textbox(label="Enter any sentence in Hindi, English or both language", type="text"), | |
| ], | |
| outputs = [ | |
| gr.Label(label="Token count"), | |
| gr.Textbox(label="Sentence after tokenization", type="text"), | |
| gr.Textbox(label="Encoding", type="text") | |
| ], | |
| title = title, | |
| description = description, | |
| examples = examples, | |
| ) | |
| demo.launch() |