Spaces:

nikshep01
/

chatBot

Build error

App Files Files Community

nikshep01 commited on Oct 18, 2024

Commit

d1ecdea

verified ·

1 Parent(s): 227fc7a

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -7

app.py CHANGED Viewed

@@ -167,27 +167,38 @@ import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
 from threading import Thread
 tokenizer = AutoTokenizer.from_pretrained("thrishala/mental_health_chatbot")
 model = AutoModelForCausalLM.from_pretrained("thrishala/mental_health_chatbot", torch_dtype=torch.float16)
-model = model.to('cuda:0')
 class StopOnTokens(StoppingCriteria):
     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
-        stop_ids = [29, 0]
         for stop_id in stop_ids:
             if input_ids[0][-1] == stop_id:
                 return True
         return False
 def predict(message, history):
     history_transformer_format = list(zip(history[:-1], history[1:])) + [[message, ""]]
     stop = StopOnTokens()
-    messages = "".join(["".join(["\n<human>:"+item[0], "\n<bot>:"+item[1]])
-                for item in history_transformer_format])
-    model_inputs = tokenizer([messages], return_tensors="pt").to("cuda")
     streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
         model_inputs,
         streamer=streamer,
@@ -198,15 +209,19 @@ def predict(message, history):
         temperature=1.0,
         num_beams=1,
         stopping_criteria=StoppingCriteriaList([stop])
-        )
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
     partial_message = ""
     for new_token in streamer:
-        if new_token != '<':
             partial_message += new_token
             yield partial_message
 gr.ChatInterface(predict).launch()

 from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
 from threading import Thread
+# Load the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained("thrishala/mental_health_chatbot")
+# Check if CUDA (GPU) is available, otherwise use CPU
+device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
 model = AutoModelForCausalLM.from_pretrained("thrishala/mental_health_chatbot", torch_dtype=torch.float16)
+model = model.to(device)
+# Custom stopping criteria to stop generation on specific tokens
 class StopOnTokens(StoppingCriteria):
     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
+        stop_ids = [29, 0]  # EOS token or any other token you want to stop on
         for stop_id in stop_ids:
             if input_ids[0][-1] == stop_id:
                 return True
         return False
 def predict(message, history):
+    # Prepare the message history for the model
     history_transformer_format = list(zip(history[:-1], history[1:])) + [[message, ""]]
     stop = StopOnTokens()
+    # Format the conversation for the model
+    messages = "".join(["".join(["\n<human>:"+item[0], "\n<bot>:"+item[1]]) for item in history_transformer_format])
+    # Tokenize input and move to the correct device (GPU or CPU)
+    model_inputs = tokenizer([messages], return_tensors="pt").to(device)
+    # Create a streamer to handle model outputs
     streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
+    # Generation parameters
     generate_kwargs = dict(
         model_inputs,
         streamer=streamer,
         temperature=1.0,
         num_beams=1,
         stopping_criteria=StoppingCriteriaList([stop])
+    )
+    # Run the generation in a separate thread
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
+    # Collect the generated tokens
     partial_message = ""
     for new_token in streamer:
+        if new_token != '<':  # Avoid issues with special tokens
             partial_message += new_token
             yield partial_message
+# Launch the Gradio interface
 gr.ChatInterface(predict).launch()