Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -32,6 +32,7 @@ def emulate(text, ttft=1000, out_tps=10, in_tps=None, model_name="lmsys/vicuna-7
|
|
| 32 |
itl = 1000 / out_tps
|
| 33 |
words_per_second = len(text_array) / len(tokens) * out_tps
|
| 34 |
inter_word_latency = 1000 / words_per_second
|
|
|
|
| 35 |
|
| 36 |
# start
|
| 37 |
print(f'tokenizer: "{model_name}"')
|
|
@@ -45,7 +46,7 @@ def emulate(text, ttft=1000, out_tps=10, in_tps=None, model_name="lmsys/vicuna-7
|
|
| 45 |
print(f'Starting...\n\n')
|
| 46 |
start_time = time.time()
|
| 47 |
# Delay by ttft
|
| 48 |
-
sleep_ms(
|
| 49 |
ttft_time = time.time()
|
| 50 |
|
| 51 |
# yield text
|
|
|
|
| 32 |
itl = 1000 / out_tps
|
| 33 |
words_per_second = len(text_array) / len(tokens) * out_tps
|
| 34 |
inter_word_latency = 1000 / words_per_second
|
| 35 |
+
ttft = ttft - offset*1000
|
| 36 |
|
| 37 |
# start
|
| 38 |
print(f'tokenizer: "{model_name}"')
|
|
|
|
| 46 |
print(f'Starting...\n\n')
|
| 47 |
start_time = time.time()
|
| 48 |
# Delay by ttft
|
| 49 |
+
sleep_ms(ttft)
|
| 50 |
ttft_time = time.time()
|
| 51 |
|
| 52 |
# yield text
|