Spaces:
Build error
Build error
kwabs22 commited on
Commit ·
0ebff8f
1
Parent(s): 9835647
time variables edited
Browse files
app.py
CHANGED
|
@@ -17,8 +17,10 @@ def generate_response(user_message): #generate_response_token_by_token
|
|
| 17 |
|
| 18 |
|
| 19 |
start_time = time.time()
|
|
|
|
| 20 |
alltokens = ""
|
| 21 |
token_buffer = ''
|
|
|
|
| 22 |
try:
|
| 23 |
while True:
|
| 24 |
# Read one character at a time
|
|
@@ -30,20 +32,22 @@ def generate_response(user_message): #generate_response_token_by_token
|
|
| 30 |
if char == ' ' or char == '\n': # Token delimiters
|
| 31 |
elapsed_time = time.time() - start_time # Calculate elapsed time
|
| 32 |
alltokens += token_buffer
|
| 33 |
-
|
|
|
|
| 34 |
token_buffer = '' # Reset token buffer
|
| 35 |
# Log resource usage every minute
|
| 36 |
-
if time.time() -
|
| 37 |
cpu_usage = process_monitor.cpu_percent()
|
| 38 |
memory_usage = process_monitor.memory_info().rss # in bytes
|
| 39 |
print(f"Subprocess CPU Usage: {cpu_usage}%, Memory Usage: {memory_usage / 1024 ** 2} MB")
|
| 40 |
-
|
| 41 |
|
| 42 |
# Yield the last token if there is any
|
| 43 |
if token_buffer:
|
| 44 |
elapsed_time = time.time() - start_time # Calculate elapsed time
|
| 45 |
alltokens += token_buffer
|
| 46 |
-
yield f"{alltokens} \n\n [Inference time: {elapsed_time:.2f} seconds]"
|
|
|
|
| 47 |
finally:
|
| 48 |
try:
|
| 49 |
# Wait for the process to complete, with a timeout
|
|
|
|
| 17 |
|
| 18 |
|
| 19 |
start_time = time.time()
|
| 20 |
+
monitor_start_time = time.time()
|
| 21 |
alltokens = ""
|
| 22 |
token_buffer = ''
|
| 23 |
+
tokencount = 0
|
| 24 |
try:
|
| 25 |
while True:
|
| 26 |
# Read one character at a time
|
|
|
|
| 32 |
if char == ' ' or char == '\n': # Token delimiters
|
| 33 |
elapsed_time = time.time() - start_time # Calculate elapsed time
|
| 34 |
alltokens += token_buffer
|
| 35 |
+
tokencount += 1
|
| 36 |
+
yield f"{alltokens} \n\n [Inference time: {elapsed_time:.2f} seconds | Tokens: { tokencount }]"
|
| 37 |
token_buffer = '' # Reset token buffer
|
| 38 |
# Log resource usage every minute
|
| 39 |
+
if time.time() - monitor_start_time > 60:
|
| 40 |
cpu_usage = process_monitor.cpu_percent()
|
| 41 |
memory_usage = process_monitor.memory_info().rss # in bytes
|
| 42 |
print(f"Subprocess CPU Usage: {cpu_usage}%, Memory Usage: {memory_usage / 1024 ** 2} MB")
|
| 43 |
+
monitor_start_time = time.time() # Reset the timer
|
| 44 |
|
| 45 |
# Yield the last token if there is any
|
| 46 |
if token_buffer:
|
| 47 |
elapsed_time = time.time() - start_time # Calculate elapsed time
|
| 48 |
alltokens += token_buffer
|
| 49 |
+
yield f"{alltokens} \n\n [Inference time: {elapsed_time:.2f} seconds | Average Tokens per second: { tokencount / elapsed_time}]"
|
| 50 |
+
|
| 51 |
finally:
|
| 52 |
try:
|
| 53 |
# Wait for the process to complete, with a timeout
|