Image-to-Image
vincie
VINCIE-3B / llm14b /check_embeddings.py
leigangqu's picture
Upload folder using huggingface_hub
5096732 verified
import os
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.generation import GenerationConfig
path = "/mnt/bn/zhitian-lq/code/dalle2/runs/qwen-14b-base-double-64gpus-100k_distill_logits_fixed_seeds_lr1e-4_bs2048_pool_mean_cn/step-90000/llm"
# Note: The default behavior now has injection attack prevention off.
# tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
max_length = 77
tokenizer = AutoTokenizer.from_pretrained(
path, model_max_length=max_length,
use_fast=False, trust_remote_code=True,
padding_side="right", truncation_side="right",
add_eod_token=True
)
tokenizer.add_special_tokens({'pad_token': '<|endoftext|>'})
# tokenizer.pad_token = tokenizer.eot_token
tokens = tokenizer(["周慧敏穿着一件黑色的衣服,她的头发是黑色的,她的脸上带着微笑,她的手指放在嘴边。"], return_tensors="pt", padding="max_length", truncation=True)
tokens = tokens.to("cuda")
token_ids = tokens["input_ids"]
attention_mask = tokens["attention_mask"]
num_tokens = attention_mask.sum(dim=1)
print(num_tokens)
import torch
range_ids = torch.arange(len(token_ids), device=token_ids.device, dtype=torch.long)
print(range_ids, num_tokens.clamp(max=token_ids.size(1) - 1))
token_ids[range_ids, num_tokens.clamp(max=token_ids.size(1) - 1)] = tokenizer.pad_token_id
attention_mask[range_ids, num_tokens.clamp(max=token_ids.size(1) - 1)] = 1
print(token_ids)
print(attention_mask.sum(dim=-1))
print(token_ids.shape)
# use bf16
# model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-14B", device_map="auto", trust_remote_code=True, bf16=True).eval()
# use fp16
# model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-14B", device_map="auto", trust_remote_code=True, fp16=True).eval()
# use cpu only
# model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-14B", device_map="cpu", trust_remote_code=True).eval()
# use auto mode, automatically select precision based on the device.
model = AutoModelForCausalLM.from_pretrained(path, device_map="auto", trust_remote_code=True, bf16=True).eval()
import pickle
obj = pickle.load(open("emb_643ecbffa680d_8cccdd2acbd9404f1f909f489c2f2c3d_h800_tusou_1b6_v3_cn_caption_qwen_recaption_text_qwen_last_layer_caption_emb", "rb"))
from IPython import embed
embed()
embeddings = model(input_ids=token_ids, attention_mask=attention_mask, output_hidden_states=True).hidden_states[-1]
print(embeddings.sum(-1))
exit(0)
# Specify hyperparameters for generation. But if you use transformers>=4.32.0, there is no need to do this.
# model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-14B", trust_remote_code=True)
inputs = tokenizer('蒙古国的首都是乌兰巴托(Ulaanbaatar)\n冰岛的首都是雷克雅未克(Reykjavik)\n埃塞俄比亚的首都是', return_tensors='pt')
inputs = inputs.to(model.device)
pred = model.generate(**inputs)
print(tokenizer.decode(pred.cpu()[0], skip_special_tokens=True))
# 蒙古国的首都是乌兰巴托(Ulaanbaatar)\n冰岛的首都是雷克雅未克(Reykjavik)\n埃塞俄比亚的首都是亚的斯亚贝巴(Addis Ababa)...