RedPajama-ChatInterface

Build error

App Files Files Community

ysharma HF Staff commited on Jul 14, 2023

Commit

6fd0047

1 Parent(s): 350aa40

cleanup

Browse files

Files changed (1) hide show

app.py +17 -44

app.py CHANGED Viewed

@@ -1,47 +1,31 @@
 import gradio as gr
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
-import time
-import numpy as np
-from torch.nn import functional as F
-import os
 from threading import Thread
-# init
-tok = AutoTokenizer.from_pretrained("togethercomputer/RedPajama-INCITE-Chat-3B-v1")
-m = AutoModelForCausalLM.from_pretrained("togethercomputer/RedPajama-INCITE-Chat-3B-v1", torch_dtype=torch.float16)  #load_in_8bit=True)
-m = m.to('cuda:0')
 class StopOnTokens(StoppingCriteria):
     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
-        #stop_ids = [[29, 13961, 31], [29, 12042, 31], 1, 0]
         stop_ids = [29, 0]
         for stop_id in stop_ids:
-            #print(f"^^input ids - {input_ids}")
             if input_ids[0][-1] == stop_id:
                 return True
         return False
-def chat(message, history):
-    print(f"chatbot : {history}")
-    print(f"message : {message}")
-    history = history + [[message, ""]]
-    print(f"chatbot : {history}")
-    # Initialize a StopOnTokens object
     stop = StopOnTokens()
-    # Construct the input message string for the model by concatenating the current system message and conversation history
     messages = "".join(["".join(["\n<human>:"+item[0], "\n<bot>:"+item[1]])  #curr_system_message +
-                for item in history])
-    print(f"messages : {messages}")
-    # Tokenize the messages string
     model_inputs = tok([messages], return_tensors="pt").to("cuda")
     streamer = TextIteratorStreamer(tok, timeout=10., skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
         model_inputs,
         streamer=streamer,
@@ -53,26 +37,15 @@ def chat(message, history):
         num_beams=1,
         stopping_criteria=StoppingCriteriaList([stop])
         )
-    t = Thread(target=m.generate, kwargs=generate_kwargs)
     t.start()
-    # Initialize an empty string to store the generated text
-    partial_text = ""
-    for new_text in streamer:
-        print(new_text)
-        if new_text != '<':
-            partial_text += new_text
-            #history[-1][1] = partial_text.split('<bot>:')[-1]
-            # Yield an empty string to clean up the message textbox and the updated conversation history
-            yield partial_text
-    #return partial_text
-title = """<h1 align="center">🔥RedPajama-INCITE-Chat-3B-v1</h1><br><h2 align="center">🏃‍♂️💨Streaming with Transformers & Gradio💪</h2>"""
-description = """<br><br><h3 align="center">This is a RedPajama Chat model fine-tuned using data from Dolly 2.0 and Open Assistant over the RedPajama-INCITE-Base-3B-v1 base model.</h3>"""
-theme = gr.themes.Soft(
-    primary_hue=gr.themes.Color("#ededed", "#fee2e2", "#fecaca", "#fca5a5", "#f87171", "#ef4444", "#dc2626", "#b91c1c", "#991b1b", "#7f1d1d", "#6c1e1e"),
-    neutral_hue="red",
-)
-gr.ChatInterface(chat, delete_last_btn="❌Delete").queue().launch(debug=True)

 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
 from threading import Thread
+tokenizer = AutoTokenizer.from_pretrained("togethercomputer/RedPajama-INCITE-Chat-3B-v1")
+model = AutoModelForCausalLM.from_pretrained("togethercomputer/RedPajama-INCITE-Chat-3B-v1", torch_dtype=torch.float16)
+model = model.to('cuda:0')
 class StopOnTokens(StoppingCriteria):
     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
         stop_ids = [29, 0]
         for stop_id in stop_ids:
             if input_ids[0][-1] == stop_id:
                 return True
         return False
+def predict(message, history):
+    history_transformer_format = history + [[message, ""]]
     stop = StopOnTokens()
+    #Construct the input message string for the model by concatenating the current system message and conversation history
     messages = "".join(["".join(["\n<human>:"+item[0], "\n<bot>:"+item[1]])  #curr_system_message +
+                for item in history_transformer_format])
+    #Tokenize the messages string
     model_inputs = tok([messages], return_tensors="pt").to("cuda")
     streamer = TextIteratorStreamer(tok, timeout=10., skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
         model_inputs,
         streamer=streamer,
         num_beams=1,
         stopping_criteria=StoppingCriteriaList([stop])
         )
+    t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
+    #Initialize an empty string to store the generated text
+    partial_message  = ""
+    for new_token in streamer:
+        if new_token != '<':
+            partial_message += new_token
+            yield partial_message
+gr.ChatInterface(predict).queue().launch()