| import os |
|
|
| from transformers import AutoModelForCausalLM, AutoTokenizer |
| from transformers.generation import GenerationConfig |
|
|
| path = "/mnt/bn/zhitian-lq/code/dalle2/runs/qwen-14b-base-double-64gpus-100k_distill_logits_fixed_seeds_lr1e-4_bs2048_pool_mean_cn/step-90000/llm" |
| |
| |
|
|
| max_length = 77 |
| tokenizer = AutoTokenizer.from_pretrained( |
| path, model_max_length=max_length, |
| use_fast=False, trust_remote_code=True, |
| padding_side="right", truncation_side="right", |
| add_eod_token=True |
| ) |
| tokenizer.add_special_tokens({'pad_token': '<|endoftext|>'}) |
| |
| tokens = tokenizer(["周慧敏穿着一件黑色的衣服,她的头发是黑色的,她的脸上带着微笑,她的手指放在嘴边。"], return_tensors="pt", padding="max_length", truncation=True) |
| tokens = tokens.to("cuda") |
| token_ids = tokens["input_ids"] |
| attention_mask = tokens["attention_mask"] |
| num_tokens = attention_mask.sum(dim=1) |
| print(num_tokens) |
| import torch |
| range_ids = torch.arange(len(token_ids), device=token_ids.device, dtype=torch.long) |
| print(range_ids, num_tokens.clamp(max=token_ids.size(1) - 1)) |
| token_ids[range_ids, num_tokens.clamp(max=token_ids.size(1) - 1)] = tokenizer.pad_token_id |
| attention_mask[range_ids, num_tokens.clamp(max=token_ids.size(1) - 1)] = 1 |
| print(token_ids) |
| print(attention_mask.sum(dim=-1)) |
| print(token_ids.shape) |
| |
| |
| |
| |
| |
| |
| |
| model = AutoModelForCausalLM.from_pretrained(path, device_map="auto", trust_remote_code=True, bf16=True).eval() |
| import pickle |
| obj = pickle.load(open("emb_643ecbffa680d_8cccdd2acbd9404f1f909f489c2f2c3d_h800_tusou_1b6_v3_cn_caption_qwen_recaption_text_qwen_last_layer_caption_emb", "rb")) |
| from IPython import embed |
| embed() |
| embeddings = model(input_ids=token_ids, attention_mask=attention_mask, output_hidden_states=True).hidden_states[-1] |
|
|
| print(embeddings.sum(-1)) |
| exit(0) |
|
|
| |
| |
|
|
| inputs = tokenizer('蒙古国的首都是乌兰巴托(Ulaanbaatar)\n冰岛的首都是雷克雅未克(Reykjavik)\n埃塞俄比亚的首都是', return_tensors='pt') |
| inputs = inputs.to(model.device) |
| pred = model.generate(**inputs) |
| print(tokenizer.decode(pred.cpu()[0], skip_special_tokens=True)) |
| |
|
|
|
|