Spaces:
Sleeping
Sleeping
Commit ·
268d71e
1
Parent(s): 83e827a
copression ratio of text added
Browse files
app.py
CHANGED
|
@@ -64,8 +64,12 @@ def tokenize_text(text):
|
|
| 64 |
|
| 65 |
highlighted_text = "".join(html_parts)
|
| 66 |
|
|
|
|
|
|
|
|
|
|
| 67 |
return (
|
| 68 |
len(token_ids), # Token count
|
|
|
|
| 69 |
highlighted_text, # Highlighted text
|
| 70 |
"\n".join(token_list), # Token list
|
| 71 |
)
|
|
@@ -93,13 +97,14 @@ with gr.Blocks(css=custom_css) as demo:
|
|
| 93 |
|
| 94 |
with gr.Column(scale=1):
|
| 95 |
token_count = gr.Number(label="Token Count")
|
|
|
|
| 96 |
highlighted_output = gr.HTML(label="Tokenized Text")
|
| 97 |
token_list = gr.Textbox(label="Token List", lines=10)
|
| 98 |
|
| 99 |
input_text.change(
|
| 100 |
fn=tokenize_text,
|
| 101 |
inputs=[input_text],
|
| 102 |
-
outputs=[token_count, highlighted_output, token_list],
|
| 103 |
)
|
| 104 |
|
| 105 |
demo.launch()
|
|
|
|
| 64 |
|
| 65 |
highlighted_text = "".join(html_parts)
|
| 66 |
|
| 67 |
+
# Calculate compression ratio
|
| 68 |
+
compression_ratio = len(text) / len(token_ids) if len(token_ids) > 0 else 0
|
| 69 |
+
|
| 70 |
return (
|
| 71 |
len(token_ids), # Token count
|
| 72 |
+
compression_ratio, # Compression ratio
|
| 73 |
highlighted_text, # Highlighted text
|
| 74 |
"\n".join(token_list), # Token list
|
| 75 |
)
|
|
|
|
| 97 |
|
| 98 |
with gr.Column(scale=1):
|
| 99 |
token_count = gr.Number(label="Token Count")
|
| 100 |
+
compression_ratio = gr.Number(label="Compression Ratio")
|
| 101 |
highlighted_output = gr.HTML(label="Tokenized Text")
|
| 102 |
token_list = gr.Textbox(label="Token List", lines=10)
|
| 103 |
|
| 104 |
input_text.change(
|
| 105 |
fn=tokenize_text,
|
| 106 |
inputs=[input_text],
|
| 107 |
+
outputs=[token_count, compression_ratio, highlighted_output, token_list],
|
| 108 |
)
|
| 109 |
|
| 110 |
demo.launch()
|