Spaces:

sangyan5
/

Neural-Assessment-Generator

Build error

App Files Files Community

Neural-Assessment-Generator / Question_Eval4.py

sangyan5

Upload 11 files

7312afb verified about 1 month ago

raw

history blame contribute delete

3.8 kB

	import torch
	from transformers import GPT2LMHeadModel, GPT2TokenizerFast, pipeline
	import textstat
	import math

	class QGEvaluator:
	def __init__(self):
	print("Loading Evaluation Models...")
	self.device = "cpu"

	# 1. Fluency Model
	self.ppl_model = GPT2LMHeadModel.from_pretrained("gpt2").to(self.device)
	self.ppl_tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")

	# 2. Answerability Model
	self.qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")

	def calculate_perplexity(self, text):
	""" Metric 1: Fluency (Lower is Better) """
	encodings = self.ppl_tokenizer(text, return_tensors="pt")
	input_ids = encodings.input_ids.to(self.device)
	with torch.no_grad():
	outputs = self.ppl_model(input_ids, labels=input_ids)
	loss = outputs.loss
	ppl = math.exp(loss.item())
	return ppl

	def evaluate_question(self, question, context, original_answer):
	"""
	The Master Evaluation Function
	"""
	# --- 1. Fluency Check ---
	ppl = self.calculate_perplexity(question)
	is_fluent = ppl < 100 # Threshold: 100 is standard for GPT-2

	# --- 2. Answerability Check (The Fix) ---
	qa_result = self.qa_pipeline(question=question, context=context)
	predicted_answer = qa_result['answer']
	confidence = qa_result['score']

	# LOGIC FIX: Is the predicted answer similar to the original answer?
	# We lowercase and check for overlap (e.g., "Mike" in "Mike")
	is_correct_answer = original_answer.lower() in predicted_answer.lower() or \
	predicted_answer.lower() in original_answer.lower()

	# Verdict Logic
	# It is GOOD if: (It is Fluent) AND (It gets the Right Answer OR High Confidence)
	if is_fluent and (is_correct_answer or confidence > 0.5):
	verdict = "✅ Good"
	else:
	verdict = "❌ Bad"

	return {
	"Question": question,
	"Original Answer": original_answer,
	"Predicted Answer": predicted_answer, # See what the AI found
	"Match": "Yes" if is_correct_answer else "No",
	"Verdict": verdict
	}

	# --- Usage Example ---
	if __name__ == "__main__":
	evaluator = QGEvaluator()

	data = [
	{
	"context": "A political party is a political organization that typically seeks to attain and maintain political power within government, usually by participating in political campaigns, educational outreach, or protest actions.",
	"original_answer": "A political party",
	"question": "What is a political organization that seeks to attain and maintain political power within government?"
	},
	{
	"context": "A political system is a framework which defines acceptable political methods within a society.",
	"original_answer": "A political system",
	"question": "What defines acceptable political methods within a society?"
	},
	{
	"context": " The branch of social science that studies politics and government is referred to as political science.",
	"original_answer": "political science",
	"question": "What is the branch of social science that studies politics and government called?"
	}
	]
	print("\n--- Evaluation Report ---")
	for item in data:
	metrics = evaluator.evaluate_question(
	item['question'],
	item['context'],
	item['original_answer']
	)
	print(metrics)