| import torch |
| from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig |
| |
|
|
| MODEL_PATH = "/workspace/output/glm4_7_30b/hf_temp_07i/" |
| |
|
|
| messages = [{"role": "user", "content": "who is rick astley?"}] |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH |
| ,torch_dtype="auto", |
| device_map="auto", |
| trust_remote_code=True, |
| |
| ) |
| inputs = tokenizer.apply_chat_template( |
| messages, |
| tokenize=True, |
| add_generation_prompt=True, |
| return_dict=True, |
| enable_thinking=False, |
| |
| return_tensors="pt", |
| ) |
|
|
| print(type(tokenizer)) |
| print("chat_template is None?", tokenizer.chat_template is None) |
| print("chat_template head:\n", (tokenizer.chat_template or "")[:400]) |
|
|
| print(inputs) |
|
|
|
|
| print('---------------------------') |
| print(tokenizer.decode(inputs['input_ids'])) |
| |
| model = AutoModelForCausalLM.from_pretrained( |
| pretrained_model_name_or_path=MODEL_PATH, |
| torch_dtype=torch.bfloat16, |
| device_map="auto", |
| trust_remote_code=True, |
| ) |
| inputs = inputs.to(model.device) |
| generated_ids = model.generate(**inputs, max_new_tokens=256,use_cache=True, do_sample=True) |
| output_text = tokenizer.decode(generated_ids[0][inputs.input_ids.shape[1]:]) |
|
|
| print('--------------------------------------------------------------------------------------') |
| print(output_text) |
| |