Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -19,15 +19,11 @@ class StopOnTokens(StoppingCriteria):
|
|
| 19 |
|
| 20 |
# Define prediction function for the chat interface
|
| 21 |
def predict(message, history):
|
| 22 |
-
#
|
| 23 |
-
|
| 24 |
-
stop = StopOnTokens()
|
| 25 |
-
|
| 26 |
-
# Concatenate previous messages and the user's input
|
| 27 |
-
messages = "".join([f"\n### user : {item[0]} \n### bot : {item[1]}" for item in history_transformer_format])
|
| 28 |
|
| 29 |
# Tokenize the input
|
| 30 |
-
model_inputs = tokenizer([
|
| 31 |
|
| 32 |
# Set up the streamer for partial message output
|
| 33 |
streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
|
|
@@ -36,7 +32,7 @@ def predict(message, history):
|
|
| 36 |
generate_kwargs = dict(
|
| 37 |
model_inputs,
|
| 38 |
streamer=streamer,
|
| 39 |
-
max_new_tokens=1024
|
| 40 |
)
|
| 41 |
|
| 42 |
# Run generation in a separate thread
|
|
@@ -51,4 +47,4 @@ def predict(message, history):
|
|
| 51 |
yield partial_message
|
| 52 |
|
| 53 |
# Create the chat interface using Gradio
|
| 54 |
-
gr.ChatInterface(fn=predict, title="Monlam LLM
|
|
|
|
| 19 |
|
| 20 |
# Define prediction function for the chat interface
|
| 21 |
def predict(message, history):
|
| 22 |
+
# Format the input according to your specified structure
|
| 23 |
+
formatted_input = f"### user : {message} ### input: ### answer:"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
# Tokenize the input
|
| 26 |
+
model_inputs = tokenizer([formatted_input], return_tensors="pt").to("cuda")
|
| 27 |
|
| 28 |
# Set up the streamer for partial message output
|
| 29 |
streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
|
|
|
|
| 32 |
generate_kwargs = dict(
|
| 33 |
model_inputs,
|
| 34 |
streamer=streamer,
|
| 35 |
+
max_new_tokens=1024
|
| 36 |
)
|
| 37 |
|
| 38 |
# Run generation in a separate thread
|
|
|
|
| 47 |
yield partial_message
|
| 48 |
|
| 49 |
# Create the chat interface using Gradio
|
| 50 |
+
gr.ChatInterface(fn=predict, title="Monlam LLM", description="").launch(share=True)
|