WebTokenizer

Sleeping

xzuyn commited on Apr 20, 2024

Commit

c9f41a3

verified ·

1 Parent(s): 0afb719

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -22,6 +22,7 @@ def tokenize(input_text):
     phi2_tokens = len(phi2_tokenizer(input_text, add_special_tokens=True)["input_ids"])
     t5_tokens = len(t5_tokenizer(input_text, add_special_tokens=True)["input_ids"])
     gemma_tokens = len(gemma_tokenizer(input_text, add_special_tokens=True)["input_ids"])
     results = {
         "LLaMa": llama_tokens,
@@ -32,7 +33,8 @@ def tokenize(input_text):
         "Falcon": falcon_tokens,
         "Phi-2": phi2_tokens,
         "T5": t5_tokens,
-        "Gemma": gemma_tokens
     }
     # Sort the results in descending order based on token length
@@ -51,6 +53,7 @@ if __name__ == "__main__":
     phi2_tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")
     t5_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xxl")
     gemma_tokenizer = AutoTokenizer.from_pretrained("alpindale/gemma-2b")
-    iface = gr.Interface(fn=tokenize, inputs=gr.Textbox(lines=9), outputs="text")
     iface.launch()

     phi2_tokens = len(phi2_tokenizer(input_text, add_special_tokens=True)["input_ids"])
     t5_tokens = len(t5_tokenizer(input_text, add_special_tokens=True)["input_ids"])
     gemma_tokens = len(gemma_tokenizer(input_text, add_special_tokens=True)["input_ids"])
+    command_r_tokens = len(command_r_tokenizer(input_text, add_special_tokens=True)["input_ids"])
     results = {
         "LLaMa": llama_tokens,
         "Falcon": falcon_tokens,
         "Phi-2": phi2_tokens,
         "T5": t5_tokens,
+        "Gemma": gemma_tokens,
+        "Command-R": command_r_tokens
     }
     # Sort the results in descending order based on token length
     phi2_tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")
     t5_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xxl")
     gemma_tokenizer = AutoTokenizer.from_pretrained("alpindale/gemma-2b")
+    command_r_tokenizer = AutoTokenizer.from_pretrained("CohereForAI/c4ai-command-r-plus")
+    iface = gr.Interface(fn=tokenize, inputs=gr.Textbox(lines=10), outputs="text")
     iface.launch()