satyanayak commited on
Commit
268d71e
·
1 Parent(s): 83e827a

copression ratio of text added

Browse files
Files changed (1) hide show
  1. app.py +6 -1
app.py CHANGED
@@ -64,8 +64,12 @@ def tokenize_text(text):
64
 
65
  highlighted_text = "".join(html_parts)
66
 
 
 
 
67
  return (
68
  len(token_ids), # Token count
 
69
  highlighted_text, # Highlighted text
70
  "\n".join(token_list), # Token list
71
  )
@@ -93,13 +97,14 @@ with gr.Blocks(css=custom_css) as demo:
93
 
94
  with gr.Column(scale=1):
95
  token_count = gr.Number(label="Token Count")
 
96
  highlighted_output = gr.HTML(label="Tokenized Text")
97
  token_list = gr.Textbox(label="Token List", lines=10)
98
 
99
  input_text.change(
100
  fn=tokenize_text,
101
  inputs=[input_text],
102
- outputs=[token_count, highlighted_output, token_list],
103
  )
104
 
105
  demo.launch()
 
64
 
65
  highlighted_text = "".join(html_parts)
66
 
67
+ # Calculate compression ratio
68
+ compression_ratio = len(text) / len(token_ids) if len(token_ids) > 0 else 0
69
+
70
  return (
71
  len(token_ids), # Token count
72
+ compression_ratio, # Compression ratio
73
  highlighted_text, # Highlighted text
74
  "\n".join(token_list), # Token list
75
  )
 
97
 
98
  with gr.Column(scale=1):
99
  token_count = gr.Number(label="Token Count")
100
+ compression_ratio = gr.Number(label="Compression Ratio")
101
  highlighted_output = gr.HTML(label="Tokenized Text")
102
  token_list = gr.Textbox(label="Token List", lines=10)
103
 
104
  input_text.change(
105
  fn=tokenize_text,
106
  inputs=[input_text],
107
+ outputs=[token_count, compression_ratio, highlighted_output, token_list],
108
  )
109
 
110
  demo.launch()