applied-ai-018's picture
Add files using upload-large-folder tool
5949b83 verified
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import habana_frameworks.torch.core as htcore
# Set up the environment to use the Habana device
# This step is hypothetical and assumes that the Habana PyTorch extension is installed
# and that 'habana' is a recognized device type in PyTorch.
# Load the tokenizer and model from a local directory
tokenizer = AutoTokenizer.from_pretrained("/mnt/weka/peacock/evaluation/ConvertedTokenizer")
#model_id="/mnt/weka/peacock/evaluation/models-hf/global_step240000_hf"
model_id="/mnt/weka/peacock/training/llama3b/checkpoint/llamav2-3b/128/sangraha-hindi-TI15500-TP4PP2DP16-MBS8/hf/global_step11000" # continued training on Sangraha Hindi
# model_id="/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step240000" # pre-trained model
model = AutoModelForCausalLM.from_pretrained(model_id)
model = model.bfloat16()
# Move the model to the HPU
model = model.eval().to('hpu')
# Encode the input text
#input_text = "What is AI?"
# input_text=["What is AI?", "How old are", "I am", "I like to"]
input_text=["ahmedabad me baarish ","mumbai se delhi jane ke liye ","chhote baccho ke liye upahar "]
input_ids = tokenizer(input_text, return_tensors='pt',padding=True, truncation=True)
attention_mask = input_ids['attention_mask'].to('hpu')
input_ids = input_ids['input_ids'].to('hpu')
pad_token_id = tokenizer.pad_token_id
max_length = input_ids.shape[1] + 100
# Run inference
with torch.no_grad():
outputs = model.generate(input_ids,\
max_length=max_length,\
attention_mask=attention_mask,\
pad_token_id=pad_token_id)
# Decode the output IDs to a string
#output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
output_texts = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
for i, output_text in enumerate(output_texts):
print(f"Output for input {i}: {input_text[i]} {output_text[len(input_text[i]):].strip()}")