|
|
import json |
|
|
from threading import Thread |
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer |
|
|
import torch |
|
|
|
|
|
from .configuration_phi import PhiConfig |
|
|
from .modeling_phi import PhiForCausalLM |
|
|
|
|
|
|
|
|
|
|
|
""" |
|
|
if __name__ == "__main__": |
|
|
device = "cuda" |
|
|
|
|
|
model_config = PhiConfig(**json.load(open("simplified_phi2/config.json"))) |
|
|
model = PhiForCausalLM(model_config).to(device) |
|
|
phi_model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2", trust_remote_code=True) |
|
|
model.load_state_dict(phi_model.state_dict()) |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True) |
|
|
|
|
|
text = "Write an essay on sea monkeys: " |
|
|
tokens = tokenizer(text, return_tensors="pt", return_attention_mask=False).to(device) |
|
|
outputs = model.generate(**tokens, max_length=200) |
|
|
text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0] |
|
|
print(text) |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
""" |
|
|
if __name__ == "__main__": |
|
|
client = InferenceClient(model="microsoft/phi-2") |
|
|
text = "How do you make cheese?" |
|
|
for token in client.text_generation(text, max_new_tokens=500, stream=True): |
|
|
print(token, end="") |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True) |
|
|
token_streamer = TextIteratorStreamer(tokenizer) |
|
|
|
|
|
|
|
|
device = "cuda" |
|
|
model_config = PhiConfig(**json.load(open("simplified_phi2/config.json"))) |
|
|
model = PhiForCausalLM(model_config).to(device) |
|
|
phi_model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2", trust_remote_code=True) |
|
|
model.load_state_dict(phi_model.state_dict()) |
|
|
thread = Thread( |
|
|
target=model.generate, |
|
|
kwargs=dict( |
|
|
tokenizer( |
|
|
"Here is an essay on sea monkeys: ", |
|
|
return_tensors="pt", |
|
|
return_attention_mask=False, |
|
|
).to(device), |
|
|
streamer=token_streamer, |
|
|
max_new_tokens=500, |
|
|
eos_token_id=tokenizer.eos_token_id, |
|
|
), |
|
|
) |
|
|
thread.start() |
|
|
|
|
|
|
|
|
my_output = "" |
|
|
for new_token in token_streamer: |
|
|
my_output += new_token |
|
|
print(new_token, end="", flush=True) |
|
|
print() |
|
|
|