aruna0-0 commited on
Commit
c54cc80
·
verified ·
1 Parent(s): 3f13e6e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -37
app.py CHANGED
@@ -1,36 +1,48 @@
1
- import torch
2
- from transformers import GPT2Tokenizer, GPT2LMHeadModel
3
  import gradio as gr
 
 
4
 
5
- # Load the custom model and tokenizer
6
- model_path = 'redael/model_udc'
7
- tokenizer = GPT2Tokenizer.from_pretrained(model_path)
8
- model = GPT2LMHeadModel.from_pretrained(model_path)
9
 
10
- # Check if CUDA is available and use GPU if possible, enable FP16 precision
11
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
  model.to(device)
13
- if device.type == 'cuda':
14
- model = model.half() # Use FP16 precision
15
 
16
- def generate_response(prompt, model, tokenizer, max_length=100, num_beams=1, temperature=0.7, top_p=0.9, repetition_penalty=2.0):
17
- # Prepare the prompt
18
- prompt = f"User: {prompt}\nAssistant:"
 
 
 
 
 
 
 
 
 
 
 
 
19
  inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True, max_length=512).to(device)
 
 
20
  outputs = model.generate(
21
  inputs['input_ids'],
22
- max_length=max_length,
23
  num_return_sequences=1,
24
  pad_token_id=tokenizer.eos_token_id,
25
- num_beams=num_beams,
26
  temperature=temperature,
27
  top_p=top_p,
28
- repetition_penalty=repetition_penalty,
29
- early_stopping=True
30
  )
31
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
32
 
33
- # Post-processing to clean up the response
34
  response = response.split("Assistant:")[-1].strip()
35
  response_lines = response.split('\n')
36
  clean_response = []
@@ -38,29 +50,27 @@ def generate_response(prompt, model, tokenizer, max_length=100, num_beams=1, tem
38
  if "User:" not in line and "Assistant:" not in line:
39
  clean_response.append(line)
40
  response = ' '.join(clean_response)
41
- return response.strip()
42
-
43
- def respond(message, history):
44
- # Prepare the prompt from the history and the new message
45
- system_message = "You are a friendly chatbot."
46
- conversation = system_message + "\n"
47
- for user_message, assistant_response in history:
48
- conversation += f"User: {user_message}\nAssistant: {assistant_response}\n"
49
- conversation += f"User: {message}\nAssistant:"
50
-
51
- # Fixed values for generation parameters
52
- max_tokens = 100 # Adjusted max tokens
53
- temperature = 0.7
54
- top_p = 0.9
55
-
56
- response = generate_response(conversation, model, tokenizer, max_length=max_tokens, temperature=temperature, top_p=top_p)
57
 
58
- return response
59
 
60
- # Gradio Chat Interface
61
  demo = gr.ChatInterface(
62
- respond
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  )
64
 
65
- if _name_ == "_main_":
66
  demo.launch()
 
1
+
2
+ import os
3
  import gradio as gr
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ import torch
6
 
7
+ # Load your model and tokenizer from Hugging Face
8
+ model_name = 'redael/model_udc'
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
10
+ model = AutoModelForCausalLM.from_pretrained(model_name)
11
 
 
12
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
  model.to(device)
 
 
14
 
15
+ # Function to generate response
16
+ def generate_response(message, history, system_message, max_tokens, temperature, top_p):
17
+ # Prepare the conversation history
18
+ messages = [{"role": "system", "content": system_message}]
19
+
20
+ for user_msg, bot_msg in history:
21
+ if user_msg:
22
+ messages.append({"role": "user", "content": user_msg})
23
+ if bot_msg:
24
+ messages.append({"role": "assistant", "content": bot_msg})
25
+
26
+ messages.append({"role": "user", "content": message})
27
+
28
+ # Tokenize and prepare the input
29
+ prompt = "\n".join([f"{msg['role'].capitalize()}: {msg['content']}" for msg in messages])
30
  inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True, max_length=512).to(device)
31
+
32
+ # Generate the response
33
  outputs = model.generate(
34
  inputs['input_ids'],
35
+ max_length=max_tokens,
36
  num_return_sequences=1,
37
  pad_token_id=tokenizer.eos_token_id,
 
38
  temperature=temperature,
39
  top_p=top_p,
40
+ early_stopping=True,
41
+ do_sample=True # Enable sampling
42
  )
43
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
44
 
45
+ # Clean up the response
46
  response = response.split("Assistant:")[-1].strip()
47
  response_lines = response.split('\n')
48
  clean_response = []
 
50
  if "User:" not in line and "Assistant:" not in line:
51
  clean_response.append(line)
52
  response = ' '.join(clean_response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
+ return [(message, response)]
55
 
56
+ # Create the Gradio chat interface
57
  demo = gr.ChatInterface(
58
+ fn=generate_response,
59
+ additional_inputs=[
60
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
61
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
62
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
63
+ gr.Slider(
64
+ minimum=0.1,
65
+ maximum=1.0,
66
+ value=0.95,
67
+ step=0.05,
68
+ label="Top-p (nucleus sampling)",
69
+ ),
70
+ ],
71
+ title="Chatbot",
72
+ description="Ask anything to the chatbot."
73
  )
74
 
75
+ if __name__ == "__main__":
76
  demo.launch()