|
|
--- |
|
|
license: mit |
|
|
language: |
|
|
- en |
|
|
--- |
|
|
A Moe model built on top of microsoft/phi-2, g-ronimo/phi-2-OpenHermes-2.5 and mlx-community/phi-2-dpo-7k, random init gates weights |
|
|
|
|
|
|
|
|
## Example |
|
|
``` |
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline |
|
|
import torch |
|
|
|
|
|
DEV = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
|
|
|
|
|
|
|
|
model_name_or_path = "mzbac/phi2-2x3" |
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, |
|
|
trust_remote_code=True, |
|
|
torch_dtype=torch.bfloat16, |
|
|
) |
|
|
model.to(DEV) |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True) |
|
|
|
|
|
prompt = "Instruct: how backpropagation works.\nOutput:" |
|
|
|
|
|
print("\n\n*** Generate:") |
|
|
|
|
|
inputs = tokenizer.encode(prompt, return_tensors="pt").to(DEV) |
|
|
|
|
|
generate_kwargs = dict( |
|
|
input_ids=inputs, |
|
|
temperature=0.3, |
|
|
max_new_tokens=500, |
|
|
do_sample=True, |
|
|
) |
|
|
|
|
|
outputs = model.generate(**generate_kwargs) |
|
|
print(tokenizer.decode(outputs[0])) |
|
|
``` |