Spaces:

ncomly-nvidia
/

tps_emulator

Sleeping

ncomly-nvidia commited on Jan 30, 2025

Commit

04c3ecc

verified ·

1 Parent(s): dbeb24c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -32,6 +32,7 @@ def emulate(text, ttft=1000, out_tps=10, in_tps=None, model_name="lmsys/vicuna-7
   itl = 1000 / out_tps
   words_per_second = len(text_array) / len(tokens) * out_tps
   inter_word_latency =  1000 / words_per_second
   # start
   print(f'tokenizer:  "{model_name}"')
@@ -45,7 +46,7 @@ def emulate(text, ttft=1000, out_tps=10, in_tps=None, model_name="lmsys/vicuna-7
   print(f'Starting...\n\n')
   start_time = time.time()
   # Delay by ttft
-  sleep_ms(max(ttft-offset*1000,0))
   ttft_time = time.time()
   # yield text

   itl = 1000 / out_tps
   words_per_second = len(text_array) / len(tokens) * out_tps
   inter_word_latency =  1000 / words_per_second
+  ttft = ttft - offset*1000
   # start
   print(f'tokenizer:  "{model_name}"')
   print(f'Starting...\n\n')
   start_time = time.time()
   # Delay by ttft
+  sleep_ms(ttft)
   ttft_time = time.time()
   # yield text