Spaces:
Sleeping
Sleeping
Add progress bar for encoding
Browse files
app.py
CHANGED
|
@@ -28,12 +28,16 @@ encode_col, decode_col = st.columns(2, gap='medium')
|
|
| 28 |
|
| 29 |
@st.cache_data
|
| 30 |
def encode(text):
|
|
|
|
| 31 |
codec = numpyAc.arithmeticCoding()
|
| 32 |
tokenized = tokenizer(text, return_tensors='pt').input_ids.to('cuda')
|
| 33 |
output = list()
|
| 34 |
past_key_values = None
|
| 35 |
|
|
|
|
|
|
|
| 36 |
for i in range(tokenized.shape[1]):
|
|
|
|
| 37 |
with torch.no_grad():
|
| 38 |
output_ = model(
|
| 39 |
input_ids=tokenized[:, i:i + 1],
|
|
@@ -52,6 +56,7 @@ def encode(text):
|
|
| 52 |
|
| 53 |
@st.cache_data
|
| 54 |
def decode(byte_stream):
|
|
|
|
| 55 |
decodec = numpyAc.arithmeticDeCoding(byte_stream, 32_000)
|
| 56 |
input_ids = [1]
|
| 57 |
past_key_values = None
|
|
|
|
| 28 |
|
| 29 |
@st.cache_data
|
| 30 |
def encode(text):
|
| 31 |
+
bar = st.progress(0.0)
|
| 32 |
codec = numpyAc.arithmeticCoding()
|
| 33 |
tokenized = tokenizer(text, return_tensors='pt').input_ids.to('cuda')
|
| 34 |
output = list()
|
| 35 |
past_key_values = None
|
| 36 |
|
| 37 |
+
# We can't run a single pass over all tokens, because
|
| 38 |
+
# we get inconsistent results then
|
| 39 |
for i in range(tokenized.shape[1]):
|
| 40 |
+
bar.progress((i + 1) / tokenized.shape[1])
|
| 41 |
with torch.no_grad():
|
| 42 |
output_ = model(
|
| 43 |
input_ids=tokenized[:, i:i + 1],
|
|
|
|
| 56 |
|
| 57 |
@st.cache_data
|
| 58 |
def decode(byte_stream):
|
| 59 |
+
# Unfortunately progressbar for decoding isn't possible/is hard
|
| 60 |
decodec = numpyAc.arithmeticDeCoding(byte_stream, 32_000)
|
| 61 |
input_ids = [1]
|
| 62 |
past_key_values = None
|