Spaces:
Sleeping
Sleeping
Commit
·
7d4468d
1
Parent(s):
e9e4696
Updating colors
Browse files
app.py
CHANGED
|
@@ -10,9 +10,11 @@ def inference(input_text):
|
|
| 10 |
encoding = tokenizer.encode_ordinary(input_text)
|
| 11 |
sentence = [tokenizer.decode([x]) for x in encoding]
|
| 12 |
color_sentence = []
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
|
|
|
|
|
|
| 16 |
|
| 17 |
title = "Bilingual Tokenizer"
|
| 18 |
description = "A simple Gradio interface to see tokenization of Hindi and English(Hinglish) text"
|
|
|
|
| 10 |
encoding = tokenizer.encode_ordinary(input_text)
|
| 11 |
sentence = [tokenizer.decode([x]) for x in encoding]
|
| 12 |
color_sentence = []
|
| 13 |
+
color_encoding = []
|
| 14 |
+
for word, encode in zip(sentence, encoding):
|
| 15 |
+
color_sentence.append((word, str(encode)))
|
| 16 |
+
color_encoding.append((encode, str(encode)))
|
| 17 |
+
return len(encoding), color_sentence, color_encoding
|
| 18 |
|
| 19 |
title = "Bilingual Tokenizer"
|
| 20 |
description = "A simple Gradio interface to see tokenization of Hindi and English(Hinglish) text"
|