TestBart

Sleeping

App Files Files Community

kdevoe commited on Sep 30, 2024

Commit

b43a17c

verified ·

1 Parent(s): 5167829

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -22

app.py CHANGED Viewed

@@ -1,14 +1,14 @@
 import gradio as gr
-from transformers import GPT2Tokenizer, GPT2LMHeadModel, AutoModelForSeq2SeqLM, AutoTokenizer
 import torch
 from langchain.memory import ConversationBufferMemory
 # Move model to device (GPU if available)
 device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
-# Load the tokenizer and model for DistilGPT-2
-tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
-model = GPT2LMHeadModel.from_pretrained("distilgpt2")
 model.to(device)
 # Load summarization model (e.g., T5-small)
@@ -27,12 +27,12 @@ def summarize_history(history):
 # Set up conversational memory using LangChain's ConversationBufferMemory
 memory = ConversationBufferMemory()
-# Define the chatbot function with memory
-def chat_with_distilgpt2(input_text):
     # Retrieve conversation history
     conversation_history = memory.load_memory_variables({})['history']
-    # Summarize if history exceeds certain length
     if len(conversation_history.split()) > 200:
         conversation_history = summarize_history(conversation_history)
@@ -40,22 +40,19 @@ def chat_with_distilgpt2(input_text):
     full_input = f"{conversation_history}\nUser: {input_text}\nAssistant:"
     # Tokenize the input and convert to tensor
-    input_ids = tokenizer.encode(full_input, return_tensors="pt").to(device)
-    # Generate the response using the model with adjusted parameters
     outputs = model.generate(
-        input_ids,
-        max_length=input_ids.shape[1] + 100,  # Limit total length
-        max_new_tokens=100,
-        num_return_sequences=1,
         no_repeat_ngram_size=3,
         repetition_penalty=1.2,
         temperature=0.9,
         top_k=20,
-        top_p=0.8,
-        early_stopping=True,
-        pad_token_id=tokenizer.eos_token_id,
-        eos_token_id=tokenizer.eos_token_id
     )
     # Decode the model output
@@ -68,14 +65,15 @@ def chat_with_distilgpt2(input_text):
 # Set up the Gradio interface
 interface = gr.Interface(
-    fn=chat_with_distilgpt2,
-    inputs=gr.Textbox(label="Chat with DistilGPT-2"),
-    outputs=gr.Textbox(label="DistilGPT-2's Response"),
-    title="DistilGPT-2 Chatbot with Memory",
-    description="This is a simple chatbot powered by the DistilGPT-2 model with conversational memory, using LangChain.",
 )
 # Launch the Gradio app
 interface.launch()

 import gradio as gr
+from transformers import BartTokenizer, BartForConditionalGeneration, AutoModelForSeq2SeqLM, AutoTokenizer
 import torch
 from langchain.memory import ConversationBufferMemory
 # Move model to device (GPU if available)
 device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+# Load the tokenizer and model for BART Base
+tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")
+model = BartForConditionalGeneration.from_pretrained("facebook/bart-base")
 model.to(device)
 # Load summarization model (e.g., T5-small)
 # Set up conversational memory using LangChain's ConversationBufferMemory
 memory = ConversationBufferMemory()
+# Define the chatbot function with memory using BART Base
+def chat_with_bart(input_text):
     # Retrieve conversation history
     conversation_history = memory.load_memory_variables({})['history']
+    # Summarize if history exceeds a certain length
     if len(conversation_history.split()) > 200:
         conversation_history = summarize_history(conversation_history)
     full_input = f"{conversation_history}\nUser: {input_text}\nAssistant:"
     # Tokenize the input and convert to tensor
+    inputs = tokenizer(full_input, return_tensors="pt", max_length=1024, truncation=True).to(device)
+    # Generate the response using the BART model
     outputs = model.generate(
+        inputs["input_ids"],
+        max_length=1024,
+        num_beams=4,
+        early_stopping=True,
         no_repeat_ngram_size=3,
         repetition_penalty=1.2,
         temperature=0.9,
         top_k=20,
+        top_p=0.8
     )
     # Decode the model output
 # Set up the Gradio interface
 interface = gr.Interface(
+    fn=chat_with_bart,
+    inputs=gr.Textbox(label="Chat with BART Base"),
+    outputs=gr.Textbox(label="BART Base's Response"),
+    title="BART Base Chatbot with Memory",
+    description="This is a simple chatbot powered by the BART Base model with conversational memory, using LangChain.",
 )
 # Launch the Gradio app
 interface.launch()