Spaces:

Omnamdev02
/

AutoExamGen

Sleeping

App Files Files Community

AutoExamGen / option_generator.py

Omnamdev02

Add files via upload

300f197 unverified 3 months ago

raw

history blame contribute delete

11.8 kB

	import random
	import nltk
	from nltk.corpus import wordnet
	from nltk.tokenize import word_tokenize
	from nltk.tag import pos_tag

	class OptionGenerator:
	def __init__(self):
	"""Initialize the option generator with NLTK resources."""
	try:
	# Download required NLTK data with explicit resource names
	nltk.download('punkt', quiet=True)
	nltk.download('averaged_perceptron_tagger', quiet=True)
	nltk.download('wordnet', quiet=True)
	nltk.download('stopwords', quiet=True)
	nltk.download('universal_tagset', quiet=True)
	nltk.download('tagsets', quiet=True)

	self.stop_words = set(nltk.corpus.stopwords.words('english'))
	self.word_net_lemmatizer = nltk.WordNetLemmatizer()

	# POS tag mapping for WordNet
	self.pos_mapping = {
	'NN': 'n', 'NNS': 'n', 'NNP': 'n', 'NNPS': 'n',
	'VB': 'v', 'VBD': 'v', 'VBG': 'v', 'VBN': 'v', 'VBP': 'v', 'VBZ': 'v',
	'JJ': 'a', 'JJR': 'a', 'JJS': 'a',
	'RB': 'r', 'RBR': 'r', 'RBS': 'r'
	}

	except Exception as e:
	print(f"Error initializing OptionGenerator: {str(e)}")
	raise

	def _get_synonyms(self, word, pos=None):
	"""Get synonyms for a word using WordNet."""
	synonyms = set()

	# Skip if word is too short or a stop word
	if len(word) < 3 or word.lower() in self.stop_words:
	return []

	try:
	wordnet_pos = self.pos_mapping.get(pos, None) if pos else None

	# Try with the provided POS tag first
	if wordnet_pos:
	for syn in wordnet.synsets(word, pos=wordnet_pos):
	for lemma in syn.lemmas():
	synonym = lemma.name().replace('_', ' ').lower()
	if synonym != word and len(synonym.split()) == 1:
	synonyms.add(synonym)

	# If no synonyms found, try without POS tag
	if not synonyms:
	for syn in wordnet.synsets(word):
	for lemma in syn.lemmas():
	synonym = lemma.name().replace('_', ' ').lower()
	if synonym != word and len(synonym.split()) == 1:
	synonyms.add(synonym)

	# If still no synonyms, try with lemmatization
	if not synonyms and pos and pos.startswith('VB'):
	lemma = self.word_net_lemmatizer.lemmatize(word, pos='v')
	if lemma != word:
	for syn in wordnet.synsets(lemma, pos='v'):
	for l in syn.lemmas():
	synonym = l.name().replace('_', ' ').lower()
	if synonym != word and len(synonym.split()) == 1:
	synonyms.add(synonym)

	except Exception as e:
	print(f"Error getting synonyms for '{word}': {str(e)}")

	return list(synonyms)[:10] # Return at most 10 synonyms

	def _get_distractors(self, word, pos=None, num=3):
	"""Generate distractors for a given word."""
	distractors = set()

	try:
	# Get synonyms first
	synonyms = self._get_synonyms(word, pos)
	distractors.update(synonyms[:num])

	# If not enough synonyms, add similar words
	if len(distractors) < num:
	wordnet_pos = self.pos_mapping.get(pos, None) if pos else None
	similar_words = []

	for syn in wordnet.synsets(word, pos=wordnet_pos):
	for lemma in syn.lemmas():
	if lemma.name() != word:
	similar_words.append(lemma.name().replace('_', ' ').lower())

	# Add similar words that aren't already in distractors
	for w in similar_words:
	if w not in distractors and w != word:
	distractors.add(w)
	if len(distractors) >= num:
	break
	except Exception as e:
	print(f"Error generating distractors for '{word}': {str(e)}")

	return list(distractors)[:num]

	def extract_answer_from_context(self, question, context):
	"""
	Extract the most likely answer from the context based on the question.
	This version uses simple string matching instead of POS tagging.

	Args:
	question (str): Generated question
	context (str): Source sentence/context

	Returns:
	str: Extracted answer
	"""
	try:
	q_lower = question.lower()
	context_lower = context.lower()

	# Common patterns for answers
	patterns = [
	('what is', 'is'),
	('what are', 'are'),
	('what was', 'was'),
	('what were', 'were'),
	('who is', 'is'),
	('who are', 'are'),
	('who was', 'was'),
	('who were', 'were'),
	('where is', 'is'),
	('where are', 'are'),
	('when is', 'is'),
	('when was', 'was')
	]

	# Try to find a direct answer using common patterns
	for q_pattern, verb in patterns:
	if q_lower.startswith(q_pattern):
	# Look for the pattern "[verb] [answer]" in the context
	verb_pos = context_lower.find(verb)
	if verb_pos != -1:
	# Get the text after the verb
	answer_part = context[verb_pos + len(verb):].strip(' ,.?!')
	# Return the first word or phrase
	return answer_part.split(',')[0].split('.')[0].strip()

	# Fallback: return the first proper noun or capitalized word not in the question
	words = context.split()
	for word in words:
	# Skip short words and words that are in the question
	if (len(word) > 2 and word[0].isupper() and
	word.lower() not in q_lower and
	word.lower() not in self.stop_words):
	return word.strip(',.!?;:')

	# Last resort: return the first noun-like word
	for word in words:
	if len(word) > 3 and word.lower() not in q_lower and word.lower() not in self.stop_words:
	return word.strip(',.!?;:')

	# If all else fails, return the first word that's not a stop word
	for word in words:
	if word.lower() not in self.stop_words and len(word) > 2:
	return word.strip(',.!?;:')

	# Final fallback
	return context.split()[0] if context else "Unknown"

	except Exception as e:
	print(f"Error extracting answer: {str(e)}")
	# Return the first word as fallback
	return context.split()[0] if context else "Unknown"

	def create_mcq_options(self, question, context, num_options=4, correct_answer=None, global_keywords=None):
	"""
	Create multiple choice options for a given question and context.

	Args:
	question (str): The question text
	context (str): The context from which the question was generated
	num_options (int): Number of options to generate (including correct answer)
	correct_answer (str, optional): The correct answer if known
	global_keywords (list, optional): List of keywords from the entire document to use as distractors

	Returns:
	dict: Dictionary containing options and correct index
	"""
	try:
	# Extract the correct answer from context if not provided
	if not correct_answer:
	correct_answer = self.extract_answer_from_context(question, context)

	# If we couldn't extract a good answer, use a fallback
	if not correct_answer or correct_answer == "Unknown":
	return {
	'options': ['Option A', 'Option B', 'Option C', 'Option D'],
	'correct_index': 0,
	'correct_answer': 'Option A'
	}

	# Generate distractors
	distractors = self._get_distractors(
	correct_answer,
	num=min(10, num_options * 2) # Generate more than needed to filter
	)

	# Ensure we have unique distractors
	distractors = list(set(d for d in distractors if d.lower() != correct_answer.lower()))

	# If we don't have enough distractors, try using global keywords
	if len(distractors) < num_options - 1 and global_keywords:
	# Filter keywords to ensure they are not the correct answer
	potential_distractors = [k for k in global_keywords if k.lower() != correct_answer.lower()]
	# Shuffle to get random ones
	random.shuffle(potential_distractors)

	for kw in potential_distractors:
	if kw not in distractors:
	distractors.append(kw)
	if len(distractors) >= num_options + 2: # Get a few extra
	break

	# If we still don't have enough distractors, add some generic ones
	generic_distractors = [
	'True', 'False', 'Yes', 'No', 'Maybe', 'Always', 'Never',
	'Sometimes', 'Often', 'Rarely', 'All of the above', 'None of the above'
	]

	while len(distractors) < num_options - 1 and generic_distractors:
	distractor = generic_distractors.pop(0)
	if distractor.lower() != correct_answer.lower() and distractor not in distractors:
	distractors.append(distractor)

	# Select the final set of options
	options = [correct_answer] + distractors[:(num_options-1)]
	random.shuffle(options)

	# Find the index of the correct answer
	correct_index = options.index(correct_answer) if correct_answer in options else 0

	return {
	'options': options,
	'correct_index': correct_index,
	'correct_answer': correct_answer
	}

	except Exception as e:
	print(f"Error generating options: {str(e)}")
	# Fallback options
	return {
	'options': ['Option A', 'Option B', 'Option C', 'Option D'],
	'correct_index': 0,
	'correct_answer': 'Option A'
	}

	# Example usage
	if __name__ == "__main__":
	og = OptionGenerator()

	test_question = "What is the capital of France?"
	test_context = "Paris is the capital of France, known for its art, fashion, and culture."

	print(f"Question: {test_question}")
	print(f"Context: {test_context}")

	mcq = og.create_mcq_options(test_question, test_context)
	print("\nOptions:")
	for i, option in enumerate(mcq['options']):
	marker = "✓" if i == mcq['correct_index'] else " "
	print(f"{marker} {chr(65+i)}. {option}")