| | import torch |
| | from peft import AutoPeftModelForCausalLM |
| | from transformers import AutoTokenizer, pipeline |
| |
|
| | peft_model_id = "philschmid/gemma-7b-dolly-chatml" |
| |
|
| | |
| | tokenizer = AutoTokenizer.from_pretrained(peft_model_id) |
| | model = AutoPeftModelForCausalLM.from_pretrained(peft_model_id, device_map="auto", torch_dtype=torch.float16) |
| | pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) |
| | eos_token = tokenizer("<|im_end|>",add_special_tokens=False)["input_ids"][0] |
| | print(f"eos_token: {eos_token}") |
| |
|
| | |
| | messages = [ |
| | { |
| | "role": "user", |
| | "content": "What is the capital of Germany? Explain why thats the case and if it was different in the past?" |
| | } |
| | ] |
| |
|
| | prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) |
| | outputs = pipe(prompt, max_new_tokens=1024, do_sample=True, temperature=0.7, top_k=50, top_p=0.95, eos_token_id=eos_token) |
| |
|
| | print(f"prompt:\n {messages[0]['content']}") |
| | print(f"response:\n {outputs[0]['generated_text'][len(prompt):]}") |
| |
|
| | |
| | messages = [ |
| | { |
| | "role": "user", |
| | "content": "In a town, 60% of the population are adults. Among the adults, 30% have a pet dog and 40% have a pet cat. What percentage of the total population has a pet dog?" |
| | } |
| | ] |
| |
|
| | prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) |
| | outputs = pipe(prompt, max_new_tokens=1024, do_sample=True, temperature=0.7, top_k=50, top_p=0.95, eos_token_id=eos_token) |
| |
|
| | print(f"prompt:\n {messages[0]['content']}") |
| | print(f"response:\n {outputs[0]['generated_text'][len(prompt):]}") |
| |
|