TriviaVerse_38 / components /mcq_generator.py
RaghavenderReddy's picture
Upload 13 files
33a2aaf verified
import random
import re
from typing import List, Dict, Any
class MCQGenerator:
"""Generate Multiple Choice Questions from content"""
def __init__(self):
self.question_templates = {
'easy': [
"What is {entity}?",
"When did {event} happen?",
"Where is {place} located?",
"Who was {person}?"
],
'medium': [
"What was the significance of {event}?",
"How did {process} work?",
"What caused {event}?",
"What were the effects of {action}?"
],
'hard': [
"Analyze the relationship between {concept1} and {concept2}.",
"What would have happened if {event} had not occurred?",
"Compare and contrast {item1} and {item2}.",
"Evaluate the impact of {factor} on {outcome}."
]
}
def generate_questions(self, content: str, difficulty: str, num_questions: int,
language: str, translator) -> List[Dict[str, Any]]:
"""Generate MCQ questions from content"""
if not content or len(content.strip()) < 100:
return self._generate_fallback_questions(difficulty, num_questions, language, translator)
# Extract key information from content
sentences = self._extract_sentences(content)
key_facts = self._extract_key_facts(sentences)
questions = []
for i in range(num_questions):
if i < len(key_facts):
question = self._create_question_from_fact(key_facts[i], difficulty, language, translator)
else:
# Generate additional questions using templates
question = self._generate_template_question(content, difficulty, language, translator)
if question:
questions.append(question)
return questions[:num_questions]
def _extract_sentences(self, content: str) -> List[str]:
"""Extract sentences from content"""
# Simple sentence splitting
sentences = re.split(r'[.!?]+', content)
sentences = [s.strip() for s in sentences if len(s.strip()) > 20]
return sentences[:50] # Limit to first 50 sentences
def _extract_key_facts(self, sentences: List[str]) -> List[Dict[str, str]]:
"""Extract key facts from sentences"""
facts = []
for sentence in sentences:
# Look for sentences with dates, numbers, or proper nouns
if self._contains_factual_info(sentence):
fact = {
'sentence': sentence,
'type': self._classify_fact_type(sentence)
}
facts.append(fact)
return facts
def _contains_factual_info(self, sentence: str) -> bool:
"""Check if sentence contains factual information"""
# Look for dates, numbers, proper nouns, etc.
patterns = [
r'\b\d{4}\b', # Years
r'\b\d+\b', # Numbers
r'\b[A-Z][a-z]+\s+[A-Z][a-z]+\b', # Proper nouns
r'\bin\s+\d{4}\b', # "in 1947"
r'\bwas\s+born\b', # Birth information
r'\bwas\s+founded\b', # Foundation information
]
return any(re.search(pattern, sentence) for pattern in patterns)
def _classify_fact_type(self, sentence: str) -> str:
"""Classify the type of fact"""
if re.search(r'\b\d{4}\b', sentence):
return 'date'
elif re.search(r'\b\d+\b', sentence):
return 'number'
elif re.search(r'\b[A-Z][a-z]+\s+[A-Z][a-z]+\b', sentence):
return 'person_place'
else:
return 'general'
def _create_question_from_fact(self, fact: Dict[str, str], difficulty: str,
language: str, translator) -> Dict[str, Any]:
"""Create a question from a fact"""
sentence = fact['sentence']
fact_type = fact['type']
# Extract the main subject and predicate
question_text, correct_answer = self._generate_question_and_answer(sentence, fact_type, difficulty)
if not question_text or not correct_answer:
return None
# Generate distractors (wrong options)
options = self._generate_options(correct_answer, fact_type)
# Translate if needed
if language != 'en':
question_text = translator.translate_text(question_text, language)
options = [translator.translate_text(opt, language) for opt in options]
return {
'question': question_text,
'options': options,
'correct_answer': 0, # Correct answer is always first, will be shuffled
'explanation': f"Based on the text: {sentence[:100]}...",
'difficulty': difficulty,
'type': fact_type
}
def _generate_question_and_answer(self, sentence: str, fact_type: str, difficulty: str):
"""Generate question and answer from sentence"""
if fact_type == 'date':
# Extract year/date
date_match = re.search(r'\b(\d{4})\b', sentence)
if date_match:
year = date_match.group(1)
question = f"In which year did the event mentioned in this context occur?"
return question, year
elif fact_type == 'number':
# Extract number
num_match = re.search(r'\b(\d+)\b', sentence)
if num_match:
number = num_match.group(1)
question = f"What is the number mentioned in this context?"
return question, number
elif fact_type == 'person_place':
# Extract proper noun
name_match = re.search(r'\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\b', sentence)
if name_match:
name = name_match.group(1)
question = f"Who or what is mentioned prominently in this context?"
return question, name
# General question
words = sentence.split()
if len(words) > 5:
# Create a fill-in-the-blank question
blank_word = random.choice(words[2:-2]) # Avoid first and last words
question = sentence.replace(blank_word, "______", 1)
return f"Fill in the blank: {question}", blank_word
return None, None
def _generate_options(self, correct_answer: str, fact_type: str) -> List[str]:
"""Generate multiple choice options"""
options = [correct_answer]
if fact_type == 'date':
# Generate nearby years
try:
year = int(correct_answer)
distractors = [
str(year - 1),
str(year + 1),
str(year - 5)
]
except ValueError:
distractors = ["1947", "1950", "1960"]
elif fact_type == 'number':
# Generate nearby numbers
try:
num = int(correct_answer)
distractors = [
str(num + 1),
str(num - 1),
str(num * 2)
]
except ValueError:
distractors = ["10", "20", "100"]
else:
# Generate generic distractors
distractors = [
"Option B",
"Option C",
"Option D"
]
options.extend(distractors[:3]) # Add up to 3 distractors
# Shuffle options and remember correct position
correct_index = 0
random.shuffle(options)
correct_index = options.index(correct_answer)
return options
def _generate_template_question(self, content: str, difficulty: str,
language: str, translator) -> Dict[str, Any]:
"""Generate question using templates"""
# Extract a random sentence for context
sentences = self._extract_sentences(content)
if not sentences:
return None
sentence = random.choice(sentences)
# Simple template-based question
question = f"Based on the content, which of the following is true?"
# Create options
options = [
"Statement related to the main topic",
"Unrelated statement A",
"Unrelated statement B",
"Unrelated statement C"
]
if language != 'en':
question = translator.translate_text(question, language)
options = [translator.translate_text(opt, language) for opt in options]
return {
'question': question,
'options': options,
'correct_answer': 0,
'explanation': f"Based on the content about the topic.",
'difficulty': difficulty,
'type': 'general'
}
def _generate_fallback_questions(self, difficulty: str, num_questions: int,
language: str, translator) -> List[Dict[str, Any]]:
"""Generate fallback questions when content is insufficient"""
fallback_questions = {
'easy': [
{
'question': 'What is the capital of India?',
'options': ['New Delhi', 'Mumbai', 'Kolkata', 'Chennai'],
'correct_answer': 0,
'explanation': 'New Delhi is the capital of India.'
},
{
'question': 'Which river is considered sacred in Hinduism?',
'options': ['Ganges', 'Yamuna', 'Narmada', 'Godavari'],
'correct_answer': 0,
'explanation': 'The Ganges river is considered most sacred in Hinduism.'
}
],
'medium': [
{
'question': 'In which year did India gain independence?',
'options': ['1947', '1946', '1948', '1950'],
'correct_answer': 0,
'explanation': 'India gained independence on August 15, 1947.'
},
{
'question': 'Who was the first Prime Minister of India?',
'options': ['Jawaharlal Nehru', 'Mahatma Gandhi', 'Sardar Patel', 'Subhas Chandra Bose'],
'correct_answer': 0,
'explanation': 'Jawaharlal Nehru was the first Prime Minister of India.'
}
],
'hard': [
{
'question': 'Which constitutional amendment is known as the "Mini Constitution"?',
'options': ['42nd Amendment', '44th Amendment', '73rd Amendment', '74th Amendment'],
'correct_answer': 0,
'explanation': 'The 42nd Amendment is called the "Mini Constitution" due to its extensive changes.'
}
]
}
questions = fallback_questions.get(difficulty, fallback_questions['easy'])
selected_questions = random.sample(questions, min(num_questions, len(questions)))
# Translate if needed
if language != 'en':
for q in selected_questions:
q['question'] = translator.translate_text(q['question'], language)
q['options'] = [translator.translate_text(opt, language) for opt in q['options']]
q['explanation'] = translator.translate_text(q['explanation'], language)
return selected_questions