Spaces:

nkubana0
/

IncludEd-AI

Sleeping

App Files Files Community

IncludEd-AI / services /question_generator.py

nkubana0

initial: IncludEd AI service

162cb6f 3 months ago

raw

history blame contribute delete

14.3 kB

	import re
	import random
	from typing import List, Dict

	class FreeQuestionGenerator:
	def __init__(self):
	# Try to load spaCy for better question generation
	try:
	import spacy
	self.nlp = spacy.load("en_core_web_sm")
	print("✅ spaCy loaded for question generation")
	self.has_spacy = True
	except:
	print("⚠️ spaCy not available, using basic question generation")
	self.nlp = None
	self.has_spacy = False

	# Common literature themes for questions
	self.themes = [
	"love and relationships",
	"conflict and resolution",
	"character development",
	"moral choices",
	"power and ambition",
	"fate vs free will",
	"appearance vs reality",
	"betrayal and loyalty"
	]

	# Literary devices
	self.devices = [
	"metaphor",
	"symbolism",
	"foreshadowing",
	"irony",
	"imagery",
	"dialogue"
	]

	def generate(self, content: str, count: int = 10) -> List[Dict]:
	"""
	Generate questions using FREE NLP and templates
	Fast and works offline
	"""
	questions = []

	# Clean content
	content = self._clean_content(content)

	if self.has_spacy and len(content) > 100:
	try:
	# Use spaCy for better questions
	doc = self.nlp(content[:2000]) # Limit to prevent slowdown

	# 1. Character questions (from Named Entity Recognition)
	questions.extend(self._generate_character_questions(doc))

	# 2. Action/Plot questions (from Verbs)
	questions.extend(self._generate_plot_questions(doc))

	# 3. Vocabulary questions
	questions.extend(self._generate_vocabulary_questions(doc))

	except Exception as e:
	print(f"⚠️ spaCy processing error: {e}")

	# 4. Add general literature questions (always)
	questions.extend(self._generate_general_questions(content))

	# 5. Add theme questions
	questions.extend(self._generate_theme_questions())

	# 6. Add inference questions
	questions.extend(self._generate_inference_questions(content))

	# Shuffle and return requested count
	random.shuffle(questions)

	# Ensure we have enough questions
	while len(questions) < count:
	questions.extend(self._generate_fallback_questions(content, 2))

	return questions[:count]

	def _clean_content(self, text: str) -> str:
	"""Remove metadata and clean text"""
	# Remove common PDF artifacts
	text = re.sub(r'Folger Shakespeare Library', '', text, flags=re.IGNORECASE)
	text = re.sub(r'Get even more from the Folger', '', text, flags=re.IGNORECASE)
	text = re.sub(r'Page \d+', '', text)
	text = re.sub(r'FTLN \d+', '', text)
	text = re.sub(r'https?://\S+', '', text)
	text = re.sub(r'\s+', ' ', text)
	return text.strip()

	def _generate_character_questions(self, doc) -> List[Dict]:
	"""Generate questions about characters using NER"""
	questions = []

	# Extract person entities
	persons = [ent.text for ent in doc.ents if ent.label_ == 'PERSON']
	persons = list(set(persons)) # Remove duplicates

	if len(persons) >= 1:
	correct = random.choice(persons)

	# Generate distractors
	distractors = [p for p in persons if p != correct]
	while len(distractors) < 3:
	distractors.append(random.choice([
	"The narrator",
	"A minor character",
	"An unnamed person",
	"Someone else",
	"The protagonist",
	"The antagonist"
	]))

	random.shuffle(distractors)
	options = [correct] + distractors[:3]
	random.shuffle(options)

	questions.append({
	"question": "Who is a main character mentioned in this passage?",
	"options": options,
	"correctAnswer": options.index(correct),
	"explanation": f"{correct} is mentioned as a character in the text.",
	"difficulty": "easy"
	})

	if len(persons) >= 2:
	# Relationship question
	char1, char2 = random.sample(persons, 2)
	questions.append({
	"question": f"What is the relationship between {char1} and {char2}?",
	"options": [
	"They interact in the story",
	"They never meet",
	"They are the same person",
	"Only one appears in the text"
	],
	"correctAnswer": 0,
	"explanation": f"Both {char1} and {char2} are mentioned in the passage.",
	"difficulty": "medium"
	})

	return questions

	def _generate_plot_questions(self, doc) -> List[Dict]:
	"""Generate questions about actions/plot"""
	questions = []

	# Extract main verbs
	verbs = [token.lemma_ for token in doc if token.pos_ == 'VERB' and len(token.text) > 3]
	verbs = list(set(verbs))[:5] # Top 5 unique verbs

	if verbs:
	main_verb = random.choice(verbs)
	questions.append({
	"question": "What action occurs in this passage?",
	"options": [
	f"Characters {main_verb}",
	"Nothing happens",
	"Only dialogue occurs",
	"The setting is described"
	],
	"correctAnswer": 0,
	"explanation": f"The text describes characters who {main_verb}.",
	"difficulty": "easy"
	})

	return questions

	def _generate_vocabulary_questions(self, doc) -> List[Dict]:
	"""Generate vocabulary questions"""
	questions = []

	# Find interesting/complex words
	interesting_words = [
	token.text for token in doc
	if len(token.text) > 7
	and token.pos_ in ['NOUN', 'VERB', 'ADJ']
	and not token.is_stop
	]

	if interesting_words:
	word = random.choice(interesting_words[:10]) # From first 10

	questions.append({
	"question": f"What does '{word}' most likely mean in this context?",
	"options": [
	"A word related to the story's events",
	"A type of animal",
	"A mathematical term",
	"A scientific concept"
	],
	"correctAnswer": 0,
	"explanation": f"Based on context, '{word}' relates to the story's events.",
	"difficulty": "medium"
	})

	return questions

	def _generate_general_questions(self, content: str) -> List[Dict]:
	"""Generate general literature questions"""
	questions = []

	# Detect if it's dialogue-heavy
	has_dialogue = content.count('"') > 5 or content.count("'") > 5

	questions.append({
	"question": "What type of literature is this?",
	"options": [
	"Drama or prose fiction",
	"Scientific article",
	"News report",
	"Technical manual"
	],
	"correctAnswer": 0,
	"explanation": "This is a work of dramatic or prose literature.",
	"difficulty": "easy"
	})

	if has_dialogue:
	questions.append({
	"question": "What literary element is most prominent?",
	"options": [
	"Dialogue and character interaction",
	"Scientific data",
	"Historical facts",
	"Geographic descriptions"
	],
	"correctAnswer": 0,
	"explanation": "The passage features significant dialogue between characters.",
	"difficulty": "easy"
	})

	return questions

	def _generate_theme_questions(self) -> List[Dict]:
	"""Generate theme-based questions"""
	theme = random.choice(self.themes)
	other_themes = random.sample([t for t in self.themes if t != theme], 3)

	options = [theme] + other_themes
	random.shuffle(options)

	return [{
	"question": "What is a major theme in this passage?",
	"options": options,
	"correctAnswer": options.index(theme),
	"explanation": f"The passage explores the theme of {theme}.",
	"difficulty": "medium"
	}]

	def _generate_inference_questions(self, content: str) -> List[Dict]:
	"""Generate inference questions"""
	questions = []

	# Detect tone based on keywords
	tone = self._detect_tone(content)

	questions.append({
	"question": "What is the mood or tone of this passage?",
	"options": [
	tone,
	"Completely neutral",
	"Purely humorous",
	"Strictly factual"
	],
	"correctAnswer": 0,
	"explanation": f"The language and word choice create a {tone} tone.",
	"difficulty": "medium"
	})

	questions.append({
	"question": "What can you infer about the characters?",
	"options": [
	"They have complex relationships and emotions",
	"They have no feelings",
	"They are all strangers",
	"They never interact"
	],
	"correctAnswer": 0,
	"explanation": "Literary passages typically explore complex human relationships.",
	"difficulty": "medium"
	})

	return questions

	def _detect_tone(self, text: str) -> str:
	"""Simple tone detection based on keywords"""
	text_lower = text.lower()

	if any(word in text_lower for word in ['death', 'murder', 'tragic', 'sorrow', 'dark']):
	return "serious and somber"
	elif any(word in text_lower for word in ['love', 'joy', 'beauty', 'delight']):
	return "romantic and hopeful"
	elif any(word in text_lower for word in ['anger', 'fight', 'conflict', 'rage']):
	return "tense and dramatic"
	elif any(word in text_lower for word in ['wonder', 'mystery', 'strange']):
	return "mysterious and intriguing"
	else:
	return "thoughtful and reflective"

	def _generate_fallback_questions(self, content: str, count: int) -> List[Dict]:
	"""Simple fallback questions - always work"""
	questions = [
	{
	"question": "What is this passage primarily about?",
	"options": [
	"Character development and relationships",
	"Pure description of objects",
	"Mathematical formulas",
	"Scientific experiments"
	],
	"correctAnswer": 0,
	"explanation": "Literary passages focus on characters and their development.",
	"difficulty": "easy"
	},
	{
	"question": "What makes this a work of literature?",
	"options": [
	"It tells a story with characters",
	"It contains only facts",
	"It has mathematical equations",
	"It gives technical instructions"
	],
	"correctAnswer": 0,
	"explanation": "Literature tells stories and explores human experiences.",
	"difficulty": "easy"
	},
	{
	"question": "What is the purpose of this text?",
	"options": [
	"To entertain and convey human experience",
	"To teach mathematics",
	"To explain chemistry",
	"To give directions"
	],
	"correctAnswer": 0,
	"explanation": "Literature aims to entertain and explore human experiences.",
	"difficulty": "medium"
	},
	{
	"question": "How should you read this passage?",
	"options": [
	"Looking for character emotions and story",
	"Looking only for facts and data",
	"Looking for scientific formulas",
	"Looking for technical instructions"
	],
	"correctAnswer": 0,
	"explanation": "Literature is best understood by focusing on characters and narrative.",
	"difficulty": "easy"
	},
	{
	"question": "What skills does reading this develop?",
	"options": [
	"Understanding human nature and empathy",
	"Mathematical calculation",
	"Scientific analysis",
	"Computer programming"
	],
	"correctAnswer": 0,
	"explanation": "Reading literature develops empathy and understanding of human nature.",
	"difficulty": "medium"
	}
	]

	return random.sample(questions, min(count, len(questions)))

	def generate_fallback(self, content: str, count: int) -> List[Dict]:
	"""Public fallback method for external calls"""
	return self._generate_fallback_questions(content, count)