| import json |
| from threading import Thread |
| from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer |
| import torch |
|
|
| from .configuration_phi import PhiConfig |
| from .modeling_phi import PhiForCausalLM |
|
|
|
|
| |
| """ |
| if __name__ == "__main__": |
| device = "cuda" |
| |
| model_config = PhiConfig(**json.load(open("simplified_phi2/config.json"))) |
| model = PhiForCausalLM(model_config).to(device) |
| phi_model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2", trust_remote_code=True) |
| model.load_state_dict(phi_model.state_dict()) |
| |
| tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True) |
| |
| text = "Write an essay on sea monkeys: " |
| tokens = tokenizer(text, return_tensors="pt", return_attention_mask=False).to(device) |
| outputs = model.generate(**tokens, max_length=200) |
| text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0] |
| print(text) |
| """ |
|
|
|
|
| |
| """ |
| if __name__ == "__main__": |
| client = InferenceClient(model="microsoft/phi-2") |
| text = "How do you make cheese?" |
| for token in client.text_generation(text, max_new_tokens=500, stream=True): |
| print(token, end="") |
| """ |
|
|
|
|
| |
| if __name__ == "__main__": |
| |
| tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True) |
| token_streamer = TextIteratorStreamer(tokenizer) |
|
|
| |
| device = "cuda" |
| model_config = PhiConfig(**json.load(open("simplified_phi2/config.json"))) |
| model = PhiForCausalLM(model_config).to(device) |
| phi_model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2", trust_remote_code=True) |
| model.load_state_dict(phi_model.state_dict()) |
| thread = Thread( |
| target=model.generate, |
| kwargs=dict( |
| tokenizer( |
| "Here is an essay on sea monkeys: ", |
| return_tensors="pt", |
| return_attention_mask=False, |
| ).to(device), |
| streamer=token_streamer, |
| max_new_tokens=500, |
| eos_token_id=tokenizer.eos_token_id, |
| ), |
| ) |
| thread.start() |
|
|
| |
| my_output = "" |
| for new_token in token_streamer: |
| my_output += new_token |
| print(new_token, end="", flush=True) |
| print() |
|
|