簡單的 python 測試程式
#4
by heaven116 - opened
# 使用 hf transformers for Gemma3
# 採用 mlx-lm, load model 有 ValueError: Expected shape (262208, 3840) but received shape (318080, 3840)
# 使用 gguf 還原模型不僅速度慢,還吃記憶體。
# 使用 gguf 還原模型另儲存 cofig.sys 替換原模型檔案,mlx-lm可載入模型, 需約45GB記憶體。
from transformers import AutoConfig, AutoTokenizer
from transformers import AutoProcessor, Gemma3ForConditionalGeneration
# 先下載至本地卷宗,然後指定目錄
model_path = "/Volumes/MacOS/models/Gemma-3-TAIDE-12b-Chat"
# 讀取及設定 config, tokenizer , model, processor
config = AutoConfig.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = Gemma3ForConditionalGeneration.from_pretrained(model_path)
processor = AutoProcessor.from_pretrained(model_path, use_fast=False)
# 提示句
messages = [
{
"role": "system",
"content": [
{"type": "text", "text": "你是一個來自台灣的AI助理,你的名字是 TAIDE,樂於以台灣人的立場幫助使用者,會用正體中文回答問題。"}
]
},
{
"role": "user", "content": [
{"type": "text", "text": "台灣在什麼地方?"},
{"type": "text", "text": "台灣有什麼特色?"},
]
},
]
# 可替換範例
"""
## 可替換範例一
{
"role": "user", "content": [
{"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"},
{"type": "text", "text": "Where is the cat standing?"},
]
},
## 可替換範例二
{
"role": "user", "content": [
{"type": "text", "text": "台灣在什麼地方?"},
{"type": "text", "text": "台灣有什麼特色?"},
]
},
"""
# 輸入
inputs = processor.apply_chat_template(
messages,
tokenize=True,
return_dict=True,
return_tensors="pt",
add_generation_prompt=True
)
# 輸出
output = model.generate(**inputs, max_new_tokens=1024, cache_implementation="static")
print(processor.decode(output[0], skip_special_tokens=True))
# 使用 hf transformers for Gemma3
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
# 先下載至本地卷宗,然後指定目錄
model_path = "/Volumes/MacOS/models/Gemma-3-TAIDE-12b-Chat"
# 讀取及設定 config, tokenizer , model
config = AutoConfig.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, config=config)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# delete <|eot_id|><|start_header_id|>assistant:
terminators = [
tokenizer.eos_token_id,
tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
eos_token_id=terminators
# 提示句
messages = [
{
"role": "system",
"content": [
{"type": "text", "text": "你是一個來自台灣的AI助理,你的名字是 TAIDE,樂於以台灣人的立場幫助使用者,會用正體中文回答問題。"}
]
},
{
"role": "user", "content": [
{"type": "text", "text": "台灣在什麼地方?"},
{"type": "text", "text": "台灣有什麼特色?"},
]
},
]
# 可替換範例
'''
## 可替換範例一
{
"role": "user", "content": [
{"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"},
{"type": "text", "text": "Where is the cat standing?"},
]
},
## 可替換範例二
{
"role": "user", "content": [
{"type": "text", "text": "台灣在什麼地方?"},
{"type": "text", "text": "台灣有什麼特色?"},
]
},
'''
# 輸入
inputs = tokenizer.apply_chat_template(
messages,
tokenize=True,
return_dict=True,
return_tensors="pt",
add_generation_prompt=True
)
# 輸出
output = model.generate(**inputs, max_new_tokens=1024, cache_implementation="static")
print(tokenizer.decode(output[0], skip_special_tokens=True))