簡單的 python 測試程式

by heaven116 - opened Jan 13

Jan 13

# 使用 hf transformers for Gemma3
# 採用 mlx-lm, load model 有 ValueError: Expected shape (262208, 3840) but received shape (318080, 3840)
# 使用 gguf 還原模型不僅速度慢，還吃記憶體。
# 使用 gguf 還原模型另儲存 cofig.sys 替換原模型檔案，mlx-lm可載入模型， 需約45GB記憶體。
from transformers import AutoConfig, AutoTokenizer
from transformers import AutoProcessor, Gemma3ForConditionalGeneration
# 先下載至本地卷宗，然後指定目錄
model_path = "/Volumes/MacOS/models/Gemma-3-TAIDE-12b-Chat" 
# 讀取及設定 config, tokenizer , model, processor
config = AutoConfig.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = Gemma3ForConditionalGeneration.from_pretrained(model_path)
processor = AutoProcessor.from_pretrained(model_path, use_fast=False)
# 提示句
messages = [
    {
        "role": "system",
        "content": [
            {"type": "text", "text": "你是一個來自台灣的AI助理，你的名字是 TAIDE，樂於以台灣人的立場幫助使用者，會用正體中文回答問題。"}
        ]
    },
    {
        "role": "user", "content": [
            {"type": "text", "text": "台灣在什麼地方?"},
            {"type": "text", "text": "台灣有什麼特色?"},
        ]
    },
]
# 可替換範例
"""
## 可替換範例一
    {
        "role": "user", "content": [           
            {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"},
            {"type": "text", "text": "Where is the cat standing?"},
        ]
    },
## 可替換範例二
    {
        "role": "user", "content": [
            {"type": "text", "text": "台灣在什麼地方?"},
            {"type": "text", "text": "台灣有什麼特色?"},
        ]
    },

"""
# 輸入
inputs = processor.apply_chat_template(
    messages,
    tokenize=True,
    return_dict=True,
    return_tensors="pt",
    add_generation_prompt=True
)
# 輸出
output = model.generate(**inputs, max_new_tokens=1024, cache_implementation="static")
print(processor.decode(output[0], skip_special_tokens=True))

heaven116

Jan 18

# 使用 hf transformers for Gemma3

from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer

# 先下載至本地卷宗，然後指定目錄
model_path = "/Volumes/MacOS/models/Gemma-3-TAIDE-12b-Chat"

# 讀取及設定 config, tokenizer , model
config = AutoConfig.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, config=config)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# delete <|eot_id|><|start_header_id|>assistant：
terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
eos_token_id=terminators

# 提示句
messages = [
    {
        "role": "system",
        "content": [
            {"type": "text", "text": "你是一個來自台灣的AI助理，你的名字是 TAIDE，樂於以台灣人的立場幫助使用者，會用正體中文回答問題。"}
        ]
    },
    {
        "role": "user", "content": [
            {"type": "text", "text": "台灣在什麼地方?"},
            {"type": "text", "text": "台灣有什麼特色?"},
        ]
    },
]
# 可替換範例
'''
## 可替換範例一
    {
        "role": "user", "content": [           
            {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"},
            {"type": "text", "text": "Where is the cat standing?"},
        ]
    },
## 可替換範例二
    {
        "role": "user", "content": [
            {"type": "text", "text": "台灣在什麼地方?"},
            {"type": "text", "text": "台灣有什麼特色?"},
        ]
    },
'''
# 輸入
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize=True,
    return_dict=True,
    return_tensors="pt",
    add_generation_prompt=True
)
# 輸出
output = model.generate(**inputs, max_new_tokens=1024, cache_implementation="static")
print(tokenizer.decode(output[0], skip_special_tokens=True))

Upload images, audio, and videos by dragging in the text input, pasting, or clicking here.

Tap or paste here to upload images

· Sign up or log in to comment