translation

Paused

TenzinGayche commited on Oct 1, 2024

Commit

4fa525d

verified ·

1 Parent(s): 23fd6d2

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -19,15 +19,11 @@ class StopOnTokens(StoppingCriteria):
 # Define prediction function for the chat interface
 def predict(message, history):
-    # Prepare the conversation in the required format
-    history_transformer_format = history + [[message, ""]]
-    stop = StopOnTokens()
-    # Concatenate previous messages and the user's input
-    messages = "".join([f"\n### user : {item[0]} \n### bot : {item[1]}" for item in history_transformer_format])
     # Tokenize the input
-    model_inputs = tokenizer([messages], return_tensors="pt").to("cuda")
     # Set up the streamer for partial message output
     streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
@@ -36,7 +32,7 @@ def predict(message, history):
     generate_kwargs = dict(
         model_inputs,
         streamer=streamer,
-        max_new_tokens=1024,
     )
     # Run generation in a separate thread
@@ -51,4 +47,4 @@ def predict(message, history):
             yield partial_message
 # Create the chat interface using Gradio
-gr.ChatInterface(fn=predict, title="Monlam LLM (beta)", description="").launch(share=True)

 # Define prediction function for the chat interface
 def predict(message, history):
+    # Format the input according to your specified structure
+    formatted_input = f"### user : {message}  ### input: ### answer:"
     # Tokenize the input
+    model_inputs = tokenizer([formatted_input], return_tensors="pt").to("cuda")
     # Set up the streamer for partial message output
     streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
         model_inputs,
         streamer=streamer,
+        max_new_tokens=1024
     )
     # Run generation in a separate thread
             yield partial_message
 # Create the chat interface using Gradio
+gr.ChatInterface(fn=predict, title="Monlam LLM", description="").launch(share=True)