kdevoe commited on
Commit
00de14a
·
verified ·
1 Parent(s): ff4945d

Adding med fine tuned model to replace small

Browse files
Files changed (1) hide show
  1. app.py +16 -9
app.py CHANGED
@@ -1,24 +1,30 @@
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
3
 
4
  # Load the shared tokenizer (using a tokenizer from DialoGPT models)
5
  tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
6
 
7
- # Define the model names
8
  model_names = {
9
- "DialoGPT-small": "microsoft/DialoGPT-small",
10
  "DialoGPT-medium": "microsoft/DialoGPT-medium"
11
  }
12
 
 
 
13
  # Pre-load the models
14
  loaded_models = {
15
- model_name: AutoModelForCausalLM.from_pretrained(model_path)
16
- for model_name, model_path in model_names.items()
17
  }
 
 
 
 
18
 
19
  def respond(
20
  message,
21
- history: list[tuple[str, str]],
22
  model_choice,
23
  max_tokens,
24
  temperature,
@@ -29,12 +35,12 @@ def respond(
29
 
30
  # Prepare the input by concatenating the history into a dialogue format
31
  input_text = ""
32
- for user_msg, bot_msg in history:
33
- input_text += f"User: {user_msg}\nAssistant: {bot_msg}\n"
34
  input_text += f"User: {message}\nAssistant:"
35
 
36
  # Tokenize the input text using the shared tokenizer
37
- inputs = tokenizer(input_text, return_tensors="pt", truncation=True)
38
 
39
  # Generate the response using the selected DialoGPT model
40
  output_tokens = model.generate(
@@ -51,9 +57,10 @@ def respond(
51
 
52
  # Define the Gradio interface
53
  demo = gr.ChatInterface(
 
54
  respond,
55
  additional_inputs=[
56
- gr.Dropdown(choices=["DialoGPT-small", "DialoGPT-medium"], value="DialoGPT-small", label="Model"),
57
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
58
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
59
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
 
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
 
5
  # Load the shared tokenizer (using a tokenizer from DialoGPT models)
6
  tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
7
 
8
+ # Define the model names, including the locally saved fine-tuned model
9
  model_names = {
10
+ "DialoGPT-med-FT": "DialoGPT-med-FT.bin",
11
  "DialoGPT-medium": "microsoft/DialoGPT-medium"
12
  }
13
 
14
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
+
16
  # Pre-load the models
17
  loaded_models = {
18
+ "DialoGPT-med-FT": AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")
 
19
  }
20
+ loaded_models["DialoGPT-med-FT"].load_state_dict(torch.load(model_names["DialoGPT-med-FT"]))
21
+ loaded_models["DialoGPT-med-FT"].to(device)
22
+
23
+ loaded_models["DialoGPT-medium"] = AutoModelForCausalLM.from_pretrained(model_names["DialoGPT-medium"]).to(device)
24
 
25
  def respond(
26
  message,
27
+ history: list[dict],
28
  model_choice,
29
  max_tokens,
30
  temperature,
 
35
 
36
  # Prepare the input by concatenating the history into a dialogue format
37
  input_text = ""
38
+ for message_pair in history:
39
+ input_text += f"{message_pair['role']}: {message_pair['content']}\n"
40
  input_text += f"User: {message}\nAssistant:"
41
 
42
  # Tokenize the input text using the shared tokenizer
43
+ inputs = tokenizer(input_text, return_tensors="pt", truncation=True).to(model.device)
44
 
45
  # Generate the response using the selected DialoGPT model
46
  output_tokens = model.generate(
 
57
 
58
  # Define the Gradio interface
59
  demo = gr.ChatInterface(
60
+ type='messages',
61
  respond,
62
  additional_inputs=[
63
+ gr.Dropdown(choices=["DialoGPT-med-FT", "DialoGPT-medium"], value="DialoGPT-med-FT", label="Model"),
64
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
65
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
66
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),