applied-ai-018
/

peacock-data-public-evaluation

Model card Files Files and versions

peacock-data-public-evaluation / launch /infer_test.py

applied-ai-018's picture

Add files using upload-large-folder tool

5949b83 verified over 1 year ago

history blame contribute delete

2.01 kB

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import habana_frameworks.torch.core as htcore

	# Set up the environment to use the Habana device
	# This step is hypothetical and assumes that the Habana PyTorch extension is installed
	# and that 'habana' is a recognized device type in PyTorch.

	# Load the tokenizer and model from a local directory
	tokenizer = AutoTokenizer.from_pretrained("/mnt/weka/peacock/evaluation/ConvertedTokenizer")

	#model_id="/mnt/weka/peacock/evaluation/models-hf/global_step240000_hf"
	model_id="/mnt/weka/peacock/training/llama3b/checkpoint/llamav2-3b/128/sangraha-hindi-TI15500-TP4PP2DP16-MBS8/hf/global_step11000" # continued training on Sangraha Hindi
	# model_id="/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step240000" # pre-trained model

	model = AutoModelForCausalLM.from_pretrained(model_id)
	model = model.bfloat16()
	# Move the model to the HPU
	model = model.eval().to('hpu')

	# Encode the input text
	#input_text = "What is AI?"
	# input_text=["What is AI?", "How old are", "I am", "I like to"]
	input_text=["ahmedabad me baarish ","mumbai se delhi jane ke liye ","chhote baccho ke liye upahar "]
	input_ids = tokenizer(input_text, return_tensors='pt',padding=True, truncation=True)
	attention_mask = input_ids['attention_mask'].to('hpu')
	input_ids = input_ids['input_ids'].to('hpu')
	pad_token_id = tokenizer.pad_token_id

	max_length = input_ids.shape[1] + 100



	# Run inference
	with torch.no_grad():
	outputs = model.generate(input_ids,\
	max_length=max_length,\
	attention_mask=attention_mask,\
	pad_token_id=pad_token_id)

	# Decode the output IDs to a string
	#output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
	output_texts = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]

	for i, output_text in enumerate(output_texts):
	print(f"Output for input {i}: {input_text[i]} {output_text[len(input_text[i]):].strip()}")