RedPajama-ChatInterface

Build error

App Files Files Community

ysharma HF Staff commited on Jul 14, 2023

Commit

5638bd8

1 Parent(s): 72ad694

added chat interface

Browse files

Files changed (1) hide show

app.py +22 -16

app.py CHANGED Viewed

@@ -9,8 +9,8 @@ from threading import Thread
 # init
 tok = AutoTokenizer.from_pretrained("togethercomputer/RedPajama-INCITE-Chat-3B-v1")
-m = AutoModelForCausalLM.from_pretrained("togethercomputer/RedPajama-INCITE-Chat-3B-v1", torch_dtype=torch.float16)
-m = m.to('cuda:0')
 class StopOnTokens(StoppingCriteria):
     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
@@ -28,30 +28,35 @@ def user(message, history):
     return "", history + [[message, ""]]
-def chat(history, top_p, top_k, temperature):
     # Initialize a StopOnTokens object
     stop = StopOnTokens()
     # Construct the input message string for the model by concatenating the current system message and conversation history
     messages = "".join(["".join(["\n<human>:"+item[0], "\n<bot>:"+item[1]])  #curr_system_message +
                 for item in history])
     # Tokenize the messages string
     model_inputs = tok([messages], return_tensors="pt").to("cuda")
-    streamer = TextIteratorStreamer(
-        tok, timeout=10., skip_prompt=False, skip_special_tokens=True)
     generate_kwargs = dict(
         model_inputs,
         streamer=streamer,
         max_new_tokens=1024,
         do_sample=True,
-        top_p=top_p, #0.95,
-        top_k=top_k, #1000,
-        temperature=temperature, #1.0,
         num_beams=1,
         stopping_criteria=StoppingCriteriaList([stop])
-    )
     t = Thread(target=m.generate, kwargs=generate_kwargs)
     t.start()
@@ -61,11 +66,11 @@ def chat(history, top_p, top_k, temperature):
         #print(new_text)
         if new_text != '<':
             partial_text += new_text
-            history[-1][1] = partial_text.split('<bot>:')[-1]
             # Yield an empty string to clean up the message textbox and the updated conversation history
-            yield history
-    return partial_text
 title = """<h1 align="center">🔥RedPajama-INCITE-Chat-3B-v1</h1><br><h2 align="center">🏃‍♂️💨Streaming with Transformers & Gradio💪</h2>"""
 description = """<br><br><h3 align="center">This is a RedPajama Chat model fine-tuned using data from Dolly 2.0 and Open Assistant over the RedPajama-INCITE-Base-3B-v1 base model.</h3>"""
@@ -74,6 +79,7 @@ theme = gr.themes.Soft(
     neutral_hue="red",
 )
 with gr.Blocks(theme=theme) as demo:
     gr.HTML(title)
@@ -113,5 +119,5 @@ with gr.Blocks(theme=theme) as demo:
       )
     gr.HTML(description)
-demo.queue(max_size=32, concurrency_count=2)
-demo.launch(debug=True)

 # init
 tok = AutoTokenizer.from_pretrained("togethercomputer/RedPajama-INCITE-Chat-3B-v1")
+m = AutoModelForCausalLM.from_pretrained("togethercomputer/RedPajama-INCITE-Chat-3B-v1", load_in_8bit=True) #torch_dtype=torch.float16)
+#m = m.to('cuda:0')
 class StopOnTokens(StoppingCriteria):
     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
     return "", history + [[message, ""]]
+def chat(message, history):
+    print(f"chatbot : {history}")
+    #history = history + [[message, ""]]
+    #print(f"chatbot : {history}")
     # Initialize a StopOnTokens object
     stop = StopOnTokens()
     # Construct the input message string for the model by concatenating the current system message and conversation history
     messages = "".join(["".join(["\n<human>:"+item[0], "\n<bot>:"+item[1]])  #curr_system_message +
                 for item in history])
     # Tokenize the messages string
     model_inputs = tok([messages], return_tensors="pt").to("cuda")
+    streamer = TextIteratorStreamer(tok, timeout=10., skip_prompt=False, skip_special_tokens=True)
     generate_kwargs = dict(
         model_inputs,
         streamer=streamer,
         max_new_tokens=1024,
         do_sample=True,
+        top_p=0.95,
+        top_k=1000,
+        temperature=1.0,
         num_beams=1,
         stopping_criteria=StoppingCriteriaList([stop])
+        )
     t = Thread(target=m.generate, kwargs=generate_kwargs)
     t.start()
         #print(new_text)
         if new_text != '<':
             partial_text += new_text
+            #history[-1][1] = partial_text.split('<bot>:')[-1]
             # Yield an empty string to clean up the message textbox and the updated conversation history
+            yield partial_text
+    #return partial_text
 title = """<h1 align="center">🔥RedPajama-INCITE-Chat-3B-v1</h1><br><h2 align="center">🏃‍♂️💨Streaming with Transformers & Gradio💪</h2>"""
 description = """<br><br><h3 align="center">This is a RedPajama Chat model fine-tuned using data from Dolly 2.0 and Open Assistant over the RedPajama-INCITE-Base-3B-v1 base model.</h3>"""
     neutral_hue="red",
 )
+gr.ChatInterface(chat, delete_last_btn="❌Delete").queue().launch(debug=True)
 with gr.Blocks(theme=theme) as demo:
     gr.HTML(title)
       )
     gr.HTML(description)
+#demo.queue(max_size=32, concurrency_count=2)
+#demo.launch(debug=True)