| import torch |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
| import habana_frameworks.torch.core as htcore |
|
|
| |
| |
| |
|
|
| |
| tokenizer = AutoTokenizer.from_pretrained("/mnt/weka/peacock/evaluation/ConvertedTokenizer") |
|
|
| |
| model_id="/mnt/weka/peacock/training/llama3b/checkpoint/llamav2-3b/128/sangraha-hindi-TI15500-TP4PP2DP16-MBS8/hf/global_step11000" |
| |
|
|
| model = AutoModelForCausalLM.from_pretrained(model_id) |
| model = model.bfloat16() |
| |
| model = model.eval().to('hpu') |
|
|
| |
| |
| |
| input_text=["ahmedabad me baarish ","mumbai se delhi jane ke liye ","chhote baccho ke liye upahar "] |
| input_ids = tokenizer(input_text, return_tensors='pt',padding=True, truncation=True) |
| attention_mask = input_ids['attention_mask'].to('hpu') |
| input_ids = input_ids['input_ids'].to('hpu') |
| pad_token_id = tokenizer.pad_token_id |
|
|
| max_length = input_ids.shape[1] + 100 |
|
|
|
|
|
|
| |
| with torch.no_grad(): |
| outputs = model.generate(input_ids,\ |
| max_length=max_length,\ |
| attention_mask=attention_mask,\ |
| pad_token_id=pad_token_id) |
|
|
| |
| |
| output_texts = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs] |
|
|
| for i, output_text in enumerate(output_texts): |
| print(f"Output for input {i}: {input_text[i]} {output_text[len(input_text[i]):].strip()}") |
|
|
|
|