Spaces:
Sleeping
Sleeping
Enhance model loading logic in chatbot application to support direct loading and PEFT adapter fallback. Updated model and tokenizer initialization for improved error handling and device management.
Browse files
app.py
CHANGED
|
@@ -1,27 +1,40 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
|
| 4 |
|
| 5 |
|
| 6 |
# Load tokenizer và model
|
| 7 |
print("Đang tải model...")
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
# Xác định device
|
| 12 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 13 |
|
| 14 |
-
# Load tokenizer
|
| 15 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name, subfolder=subfolder)
|
| 16 |
-
|
| 17 |
-
# Load model
|
| 18 |
-
model = AutoModelForCausalLM.from_pretrained(
|
| 19 |
-
model_name,
|
| 20 |
-
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
| 21 |
-
device_map="auto" if torch.cuda.is_available() else None,
|
| 22 |
-
subfolder=subfolder,
|
| 23 |
-
)
|
| 24 |
-
|
| 25 |
# Set padding token nếu chưa có
|
| 26 |
if tokenizer.pad_token is None:
|
| 27 |
tokenizer.pad_token = tokenizer.eos_token
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 4 |
+
from peft import PeftModel
|
| 5 |
|
| 6 |
|
| 7 |
# Load tokenizer và model
|
| 8 |
print("Đang tải model...")
|
| 9 |
+
|
| 10 |
+
# Thử load trực tiếp từ path đầy đủ trước
|
| 11 |
+
try:
|
| 12 |
+
model_name = "cochi1706/decoder/qwen3-finetuned"
|
| 13 |
+
print(f"Đang thử load model từ: {model_name}")
|
| 14 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 15 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 16 |
+
model_name,
|
| 17 |
+
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
| 18 |
+
device_map="auto" if torch.cuda.is_available() else None,
|
| 19 |
+
)
|
| 20 |
+
except Exception as e:
|
| 21 |
+
print(f"Không thể load trực tiếp: {e}")
|
| 22 |
+
print("Đang thử load như PEFT adapter...")
|
| 23 |
+
# Nếu không được, thử load như PEFT adapter
|
| 24 |
+
base_model_name = "Qwen/Qwen3-0.6B"
|
| 25 |
+
adapter_repo = "cochi1706/coding-assistant"
|
| 26 |
+
|
| 27 |
+
base_model = AutoModelForCausalLM.from_pretrained(
|
| 28 |
+
base_model_name,
|
| 29 |
+
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
| 30 |
+
device_map="auto" if torch.cuda.is_available() else None,
|
| 31 |
+
)
|
| 32 |
+
model = PeftModel.from_pretrained(base_model, adapter_repo)
|
| 33 |
+
tokenizer = AutoTokenizer.from_pretrained(adapter_repo)
|
| 34 |
|
| 35 |
# Xác định device
|
| 36 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
# Set padding token nếu chưa có
|
| 39 |
if tokenizer.pad_token is None:
|
| 40 |
tokenizer.pad_token = tokenizer.eos_token
|