cochi1706 commited on
Commit
0f06294
·
1 Parent(s): f1fc130

Enhance model loading logic in chatbot application to support direct loading and PEFT adapter fallback. Updated model and tokenizer initialization for improved error handling and device management.

Browse files
Files changed (1) hide show
  1. app.py +26 -13
app.py CHANGED
@@ -1,27 +1,40 @@
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
4
 
5
 
6
  # Load tokenizer và model
7
  print("Đang tải model...")
8
- model_name = "cochi1706/decoder"
9
- subfolder = "qwen3-finetuned"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  # Xác định device
12
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
 
14
- # Load tokenizer
15
- tokenizer = AutoTokenizer.from_pretrained(model_name, subfolder=subfolder)
16
-
17
- # Load model
18
- model = AutoModelForCausalLM.from_pretrained(
19
- model_name,
20
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
21
- device_map="auto" if torch.cuda.is_available() else None,
22
- subfolder=subfolder,
23
- )
24
-
25
  # Set padding token nếu chưa có
26
  if tokenizer.pad_token is None:
27
  tokenizer.pad_token = tokenizer.eos_token
 
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ from peft import PeftModel
5
 
6
 
7
  # Load tokenizer và model
8
  print("Đang tải model...")
9
+
10
+ # Thử load trực tiếp từ path đầy đủ trước
11
+ try:
12
+ model_name = "cochi1706/decoder/qwen3-finetuned"
13
+ print(f"Đang thử load model từ: {model_name}")
14
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
15
+ model = AutoModelForCausalLM.from_pretrained(
16
+ model_name,
17
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
18
+ device_map="auto" if torch.cuda.is_available() else None,
19
+ )
20
+ except Exception as e:
21
+ print(f"Không thể load trực tiếp: {e}")
22
+ print("Đang thử load như PEFT adapter...")
23
+ # Nếu không được, thử load như PEFT adapter
24
+ base_model_name = "Qwen/Qwen3-0.6B"
25
+ adapter_repo = "cochi1706/coding-assistant"
26
+
27
+ base_model = AutoModelForCausalLM.from_pretrained(
28
+ base_model_name,
29
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
30
+ device_map="auto" if torch.cuda.is_available() else None,
31
+ )
32
+ model = PeftModel.from_pretrained(base_model, adapter_repo)
33
+ tokenizer = AutoTokenizer.from_pretrained(adapter_repo)
34
 
35
  # Xác định device
36
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
37
 
 
 
 
 
 
 
 
 
 
 
 
38
  # Set padding token nếu chưa có
39
  if tokenizer.pad_token is None:
40
  tokenizer.pad_token = tokenizer.eos_token