ncomly-nvidia commited on
Commit
04c3ecc
·
verified ·
1 Parent(s): dbeb24c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -32,6 +32,7 @@ def emulate(text, ttft=1000, out_tps=10, in_tps=None, model_name="lmsys/vicuna-7
32
  itl = 1000 / out_tps
33
  words_per_second = len(text_array) / len(tokens) * out_tps
34
  inter_word_latency = 1000 / words_per_second
 
35
 
36
  # start
37
  print(f'tokenizer: "{model_name}"')
@@ -45,7 +46,7 @@ def emulate(text, ttft=1000, out_tps=10, in_tps=None, model_name="lmsys/vicuna-7
45
  print(f'Starting...\n\n')
46
  start_time = time.time()
47
  # Delay by ttft
48
- sleep_ms(max(ttft-offset*1000,0))
49
  ttft_time = time.time()
50
 
51
  # yield text
 
32
  itl = 1000 / out_tps
33
  words_per_second = len(text_array) / len(tokens) * out_tps
34
  inter_word_latency = 1000 / words_per_second
35
+ ttft = ttft - offset*1000
36
 
37
  # start
38
  print(f'tokenizer: "{model_name}"')
 
46
  print(f'Starting...\n\n')
47
  start_time = time.time()
48
  # Delay by ttft
49
+ sleep_ms(ttft)
50
  ttft_time = time.time()
51
 
52
  # yield text