Spaces:
Running
Running
File size: 1,162 Bytes
10e4a4c 8a56d57 10e4a4c 8a56d57 10e4a4c 8a56d57 10e4a4c 8a56d57 10e4a4c 8a56d57 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
import streamlit as st
import tiktoken
from .content import TOKEN_ESTIMATOR_TEXT
def num_tokens_from_string(string: str, encoding_name: str) -> int:
"""Returns the number of tokens in a text string."""
encoding = tiktoken.get_encoding(encoding_name)
num_tokens = len(encoding.encode(string))
return num_tokens
def token_estimator():
st.markdown("### 🪙 Tokens estimator")
st.markdown(
"As our methodology deeply relies on the number of tokens processed by the model *(and as no-one is token-fluent)*, we provide you with a tool to estimate the number of tokens in a given text."
)
st.expander("ℹ️ What is a token anyway ?", expanded=False).markdown(
TOKEN_ESTIMATOR_TEXT
)
user_text_input = st.text_area(
"Type or paste some text to estimate the amount of tokens.",
"EcoLogits is a great project!",
)
_, col2, _ = st.columns([2, 1, 2])
with col2:
st.metric(
label="tokens estimated amount",
# label_visibility = 'hidden',
value=num_tokens_from_string(user_text_input, "cl100k_base"),
border=True,
)
|