ambivalent02
/

eval_qwen25_lora

Model card Files Files and versions

eval_qwen25_lora / eval_truthful_vi.py

ambivalent02's picture

Upload folder using huggingface_hub

e0765d0 verified 10 months ago

history blame contribute delete

3.4 kB

	import pandas as pd
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import jsonlines
	import sys
	from tqdm.auto import tqdm
	# --- Configuration ---
	MODEL_NAME = sys.argv[1]
	INPUT_FILENAME = "./Vietnamese truthful QA results.xlsx"
	OUTPUT_FILENAME = sys.argv[2]
	MAX_NEW_TOKENS = 512 # The maximum number of new tokens to generate for each answer.

	writer = jsonlines.open(OUTPUT_FILENAME, "w")
	# 1. Load data from an XLSX file
	try:
	df = pd.read_excel(INPUT_FILENAME)
	except FileNotFoundError:
	print(f"Error: The file '{INPUT_FILENAME}' was not found.")
	print("Please make sure your XLSX file is in the same directory as the script.")
	exit()
	except Exception as e:
	print(f"An error occurred while reading the Excel file: {e}")
	exit()

	# 2. Select Relevant Columns and validate
	if "Question" not in df.columns or "Ground truth" not in df.columns:
	print("Error: Required columns 'Question' and/or 'Ground truth' not found.")
	print(f"Available columns are: {list(df.columns)}")
	exit()

	df_processed = df[["Question", "Ground truth"]].copy()

	# 3. Load Model and Tokenizer
	print(f"Loading model '{MODEL_NAME}' and tokenizer...")
	# Set up device (use GPU if available, otherwise CPU)
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print(f"Using device: {device}")

	# Load the tokenizer and model from Hugging Face Hub
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.bfloat16, attn_implementation='flash_attention_2')
	model.to(device) # Move the model to the selected device

	# Set pad token if it's not set (GPT-2 doesn't have a default pad token)
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token
	model.config.pad_token_id = model.config.eos_token_id

	print("Model and tokenizer loaded successfully.")

	# 4. Generate Answers using the Model
	answers = []
	out_dict = []
	total_questions = len(df_processed)
	print(f"Generating answers for {total_questions} questions...")

	for i, question in tqdm(enumerate(df_processed["Question"])):
	# Encode the question text into token IDs
	# input_ids = tokenizer.encode(question, return_tensors='pt').to(device)
	messages = [
	{"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
	{"role": "user", "content": question}
	]
	input = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	input_ids = tokenizer([input], return_tensors='pt').to(model.device)
	# Generate text using the model
	# do_sample=False makes the output deterministic (no randomness)
	output_sequences = model.generate(
	**input_ids,
	max_new_tokens=MAX_NEW_TOKENS,
	do_sample=False,
	pad_token_id=tokenizer.pad_token_id
	)

	# Decode the generated token IDs back to a string
	# The output includes the original prompt, so we need to remove it.
	full_text = tokenizer.decode(output_sequences[0][input_ids['input_ids'].shape[1]:], skip_special_tokens=True)
	answer = full_text.strip()
	gold = df['Ground truth'][i]
	answers.append(answer)
	print(f"Processed question {i + 1}/{total_questions}\nAnswer: {answer}\nGold: {gold}")
	writer.write({
	"question": question,
	"answer": answer,
	"gold": gold
	})