Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -17,7 +17,7 @@ def detokenize(tokens, model_name="lmsys/vicuna-7b-v1.5"):
|
|
| 17 |
string = tokenizer.decode(tokens[0])
|
| 18 |
return string
|
| 19 |
|
| 20 |
-
def emulate(text, ttft=1000, out_tps=10, in_tps=None, model_name="lmsys/vicuna-7b-v1.5", n=10):
|
| 21 |
# get TTFT
|
| 22 |
if in_tps is not None and ttft is not None:
|
| 23 |
print("both TTFT & Input Tokens per second specified, using TTFT")
|
|
@@ -45,8 +45,7 @@ def emulate(text, ttft=1000, out_tps=10, in_tps=None, model_name="lmsys/vicuna-7
|
|
| 45 |
print(f'Starting...\n\n')
|
| 46 |
start_time = time.time()
|
| 47 |
# Delay by ttft
|
| 48 |
-
|
| 49 |
-
sleep_ms(ttft)
|
| 50 |
ttft_time = time.time()
|
| 51 |
|
| 52 |
# yield text
|
|
@@ -87,6 +86,8 @@ demo = gr.Interface(
|
|
| 87 |
gr.Slider(0, 1000, value=10, label="Output Tokens per Second"),
|
| 88 |
],
|
| 89 |
outputs="text",
|
|
|
|
|
|
|
| 90 |
# live=True
|
| 91 |
)
|
| 92 |
|
|
|
|
| 17 |
string = tokenizer.decode(tokens[0])
|
| 18 |
return string
|
| 19 |
|
| 20 |
+
def emulate(text, ttft=1000, out_tps=10, in_tps=None, model_name="lmsys/vicuna-7b-v1.5", n=10, offset=0):
|
| 21 |
# get TTFT
|
| 22 |
if in_tps is not None and ttft is not None:
|
| 23 |
print("both TTFT & Input Tokens per second specified, using TTFT")
|
|
|
|
| 45 |
print(f'Starting...\n\n')
|
| 46 |
start_time = time.time()
|
| 47 |
# Delay by ttft
|
| 48 |
+
sleep_ms(max(ttft-offset*1000,0))
|
|
|
|
| 49 |
ttft_time = time.time()
|
| 50 |
|
| 51 |
# yield text
|
|
|
|
| 86 |
gr.Slider(0, 1000, value=10, label="Output Tokens per Second"),
|
| 87 |
],
|
| 88 |
outputs="text",
|
| 89 |
+
additional_inputs=[gr.Slider(minimum=0, maximum=2, step=0.1, label="TTFT Offset (S)")],
|
| 90 |
+
|
| 91 |
# live=True
|
| 92 |
)
|
| 93 |
|