kdevoe commited on
Commit
ec853a0
·
verified ·
1 Parent(s): 6b31fe2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -4
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from transformers import GPT2Tokenizer, GPT2LMHeadModel
3
  import torch
4
  from langchain.memory import ConversationBufferMemory
5
 
@@ -7,6 +7,19 @@ from langchain.memory import ConversationBufferMemory
7
  tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
8
  model = GPT2LMHeadModel.from_pretrained("distilgpt2")
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  # Move model to device (GPU if available)
11
  device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
12
  model.to(device)
@@ -16,17 +29,21 @@ memory = ConversationBufferMemory()
16
 
17
  # Define the chatbot function with memory
18
  def chat_with_distilgpt2(input_text):
19
- # Retrieve conversation history and append the current user input
20
  conversation_history = memory.load_memory_variables({})['history']
21
 
22
- # Combine the history with the current user input
 
 
 
 
23
  full_input = f"{conversation_history}\nUser: {input_text}\nAssistant:"
24
 
25
  # Tokenize the input and convert to tensor
26
  input_ids = tokenizer.encode(full_input, return_tensors="pt").to(device)
27
 
28
  # Generate the response using the model
29
- outputs = model.generate(input_ids, max_length=400, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)
30
 
31
  # Decode the model output
32
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
1
  import gradio as gr
2
+ from transformers import GPT2Tokenizer, GPT2LMHeadModel, AutoModelForSeq2SeqLM, AutoTokenizer
3
  import torch
4
  from langchain.memory import ConversationBufferMemory
5
 
 
7
  tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
8
  model = GPT2LMHeadModel.from_pretrained("distilgpt2")
9
 
10
+ # Load summarization model (e.g., T5-small)
11
+ summarizer_tokenizer = AutoTokenizer.from_pretrained("t5-small")
12
+ summarizer_model = AutoModelForSeq2SeqLM.from_pretrained("t5-small").to(device)
13
+
14
+ def summarize_history(history):
15
+ input_ids = summarizer_tokenizer.encode(
16
+ "summarize: " + history,
17
+ return_tensors="pt"
18
+ ).to(device)
19
+ summary_ids = summarizer_model.generate(input_ids, max_length=50, min_length=25, length_penalty=5., num_beams=2)
20
+ summary = summarizer_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
21
+ return summary
22
+
23
  # Move model to device (GPU if available)
24
  device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
25
  model.to(device)
 
29
 
30
  # Define the chatbot function with memory
31
  def chat_with_distilgpt2(input_text):
32
+ # Retrieve conversation history
33
  conversation_history = memory.load_memory_variables({})['history']
34
 
35
+ # Summarize if history exceeds certain length
36
+ if len(conversation_history.split()) > 200:
37
+ conversation_history = summarize_history(conversation_history)
38
+
39
+ # Combine the (possibly summarized) history with the current user input
40
  full_input = f"{conversation_history}\nUser: {input_text}\nAssistant:"
41
 
42
  # Tokenize the input and convert to tensor
43
  input_ids = tokenizer.encode(full_input, return_tensors="pt").to(device)
44
 
45
  # Generate the response using the model
46
+ outputs = model.generate(input_ids, max_length=150, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)
47
 
48
  # Decode the model output
49
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)