cochi1706 commited on
Commit
f1fc130
·
1 Parent(s): 1e36468

Refactor model loading and input handling in chatbot application. Updated model and tokenizer initialization, improved device management for inputs, and removed unused sliders from the Gradio interface.

Browse files
Files changed (1) hide show
  1. app.py +25 -38
app.py CHANGED
@@ -1,33 +1,33 @@
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
- from peft import PeftModel
5
 
6
 
7
- # Load modeltokenizer
8
  print("Đang tải model...")
9
- base_model_name = "Qwen/Qwen3-0.6B"
10
- adapter_repo = "cochi1706/coding-assistant"
11
 
12
- # Load base model
13
- base_model = AutoModelForCausalLM.from_pretrained(
14
- base_model_name,
 
 
 
 
 
 
15
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
16
  device_map="auto" if torch.cuda.is_available() else None,
 
17
  )
18
 
19
- # Load PEFT adapter
20
- model = PeftModel.from_pretrained(base_model, adapter_repo)
21
-
22
- # Load tokenizer
23
- tokenizer = AutoTokenizer.from_pretrained(adapter_repo)
24
-
25
  # Set padding token nếu chưa có
26
  if tokenizer.pad_token is None:
27
  tokenizer.pad_token = tokenizer.eos_token
28
 
29
  model.eval()
30
- print("Model đã sẵn sàng!")
31
 
32
 
33
  def respond(
@@ -55,8 +55,16 @@ def respond(
55
 
56
  # Tokenize
57
  inputs = tokenizer(prompt, return_tensors="pt")
58
- if torch.cuda.is_available():
59
- inputs = {k: v.to(model.device) for k, v in inputs.items()}
 
 
 
 
 
 
 
 
60
 
61
  # Generate với streaming token-by-token
62
  input_length = inputs["input_ids"].shape[1]
@@ -115,28 +123,7 @@ chatbot = gr.ChatInterface(
115
  value="You are a helpful coding assistant. Provide clear, concise, and accurate code solutions and explanations.",
116
  label="System message",
117
  lines=3,
118
- ),
119
- gr.Slider(
120
- minimum=1,
121
- maximum=2048,
122
- value=512,
123
- step=1,
124
- label="Max new tokens",
125
- ),
126
- gr.Slider(
127
- minimum=0.1,
128
- maximum=2.0,
129
- value=0.7,
130
- step=0.1,
131
- label="Temperature",
132
- ),
133
- gr.Slider(
134
- minimum=0.1,
135
- maximum=1.0,
136
- value=0.95,
137
- step=0.05,
138
- label="Top-p (nucleus sampling)",
139
- ),
140
  ],
141
  )
142
 
 
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
4
 
5
 
6
+ # Load tokenizermodel
7
  print("Đang tải model...")
8
+ model_name = "cochi1706/decoder"
9
+ subfolder = "qwen3-finetuned"
10
 
11
+ # Xác định device
12
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
+
14
+ # Load tokenizer
15
+ tokenizer = AutoTokenizer.from_pretrained(model_name, subfolder=subfolder)
16
+
17
+ # Load model
18
+ model = AutoModelForCausalLM.from_pretrained(
19
+ model_name,
20
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
21
  device_map="auto" if torch.cuda.is_available() else None,
22
+ subfolder=subfolder,
23
  )
24
 
 
 
 
 
 
 
25
  # Set padding token nếu chưa có
26
  if tokenizer.pad_token is None:
27
  tokenizer.pad_token = tokenizer.eos_token
28
 
29
  model.eval()
30
+ print(f"Model đã sẵn sàng! Device: {device}")
31
 
32
 
33
  def respond(
 
55
 
56
  # Tokenize
57
  inputs = tokenizer(prompt, return_tensors="pt")
58
+
59
+ # Di chuyển inputs đến device của model
60
+ # Nếu model đã có device_map, lấy device từ model parameters
61
+ if hasattr(model, 'hf_device_map') and model.hf_device_map:
62
+ # Model đã được phân bổ trên nhiều device, sử dụng device của layer đầu tiên
63
+ first_param_device = next(model.parameters()).device
64
+ inputs = {k: v.to(first_param_device) for k, v in inputs.items()}
65
+ else:
66
+ # Model trên một device duy nhất
67
+ inputs = {k: v.to(device) for k, v in inputs.items()}
68
 
69
  # Generate với streaming token-by-token
70
  input_length = inputs["input_ids"].shape[1]
 
123
  value="You are a helpful coding assistant. Provide clear, concise, and accurate code solutions and explanations.",
124
  label="System message",
125
  lines=3,
126
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  ],
128
  )
129