Spaces:
Build error
Build error
kwabs22 commited on
Commit ·
d9e0520
1
Parent(s): 03936f4
after bufsize=1 change, exploring word or token level stream
Browse files
app.py
CHANGED
|
@@ -34,6 +34,45 @@ def generate_response(user_message): #Figure Out the parameters later and find a
|
|
| 34 |
print(f"Error: {error_message}")
|
| 35 |
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
def custom_generate_response(cust_user_message):
|
| 38 |
cust_user_message = CustomPrompts[0] + '\n\n' + cust_user_message
|
| 39 |
yield from generate_response(cust_user_message)
|
|
@@ -52,6 +91,7 @@ with gr.Blocks() as iface:
|
|
| 52 |
description="No Message History for now - Enter your message and get a response. (One sentence every 20s)",
|
| 53 |
flagging_dir="/usr/src/app/flagged",
|
| 54 |
)
|
|
|
|
| 55 |
with gr.Group():
|
| 56 |
gr.HTML("Test for wrapping generator (20 seconds a piece of the response)")
|
| 57 |
MainOutput = gr.TextArea(placeholder='Output will show here')
|
|
|
|
| 34 |
print(f"Error: {error_message}")
|
| 35 |
|
| 36 |
|
| 37 |
+
def generate_response_token_by_token(user_message):
|
| 38 |
+
cmd = [
|
| 39 |
+
"/app/llama.cpp/main", # Path to the executable
|
| 40 |
+
"-m", "/app/llama.cpp/models/stablelm-2-zephyr-1_6b-Q4_0.gguf",
|
| 41 |
+
"-p", user_message,
|
| 42 |
+
"-n", "400",
|
| 43 |
+
"-e"
|
| 44 |
+
]
|
| 45 |
+
|
| 46 |
+
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, bufsize=1)
|
| 47 |
+
|
| 48 |
+
start_time = time.time()
|
| 49 |
+
token_buffer = ''
|
| 50 |
+
while True:
|
| 51 |
+
# Read one character at a time
|
| 52 |
+
char = process.stdout.read(1)
|
| 53 |
+
if char == '' and process.poll() is not None:
|
| 54 |
+
break
|
| 55 |
+
if char != '':
|
| 56 |
+
token_buffer += char
|
| 57 |
+
if char == ' ' or char == '\n': # Token delimiters
|
| 58 |
+
elapsed_time = time.time() - start_time # Calculate elapsed time
|
| 59 |
+
yield f"{token_buffer} [Inference time: {elapsed_time:.2f} seconds]"
|
| 60 |
+
token_buffer = '' # Reset token buffer
|
| 61 |
+
|
| 62 |
+
# Yield the last token if there is any
|
| 63 |
+
if token_buffer:
|
| 64 |
+
elapsed_time = time.time() - start_time # Calculate elapsed time
|
| 65 |
+
yield f"{token_buffer} [Inference time: {elapsed_time:.2f} seconds]"
|
| 66 |
+
|
| 67 |
+
# Wait for the subprocess to finish if it hasn't already
|
| 68 |
+
process.wait()
|
| 69 |
+
|
| 70 |
+
# Check for any errors
|
| 71 |
+
if process.returncode != 0:
|
| 72 |
+
error_message = process.stderr.read()
|
| 73 |
+
print(f"Error: {error_message}")
|
| 74 |
+
|
| 75 |
+
|
| 76 |
def custom_generate_response(cust_user_message):
|
| 77 |
cust_user_message = CustomPrompts[0] + '\n\n' + cust_user_message
|
| 78 |
yield from generate_response(cust_user_message)
|
|
|
|
| 91 |
description="No Message History for now - Enter your message and get a response. (One sentence every 20s)",
|
| 92 |
flagging_dir="/usr/src/app/flagged",
|
| 93 |
)
|
| 94 |
+
gr.Interface(fn=generate_response_token_by_token, inputs=gr.Textbox(lines=2, placeholder='Type prompt here...'), outputs="text", description="More Responsive streaming test")
|
| 95 |
with gr.Group():
|
| 96 |
gr.HTML("Test for wrapping generator (20 seconds a piece of the response)")
|
| 97 |
MainOutput = gr.TextArea(placeholder='Output will show here')
|