import random import re from typing import List, Dict, Any class MCQGenerator: """Generate Multiple Choice Questions from content""" def __init__(self): self.question_templates = { 'easy': [ "What is {entity}?", "When did {event} happen?", "Where is {place} located?", "Who was {person}?" ], 'medium': [ "What was the significance of {event}?", "How did {process} work?", "What caused {event}?", "What were the effects of {action}?" ], 'hard': [ "Analyze the relationship between {concept1} and {concept2}.", "What would have happened if {event} had not occurred?", "Compare and contrast {item1} and {item2}.", "Evaluate the impact of {factor} on {outcome}." ] } def generate_questions(self, content: str, difficulty: str, num_questions: int, language: str, translator) -> List[Dict[str, Any]]: """Generate MCQ questions from content""" if not content or len(content.strip()) < 100: return self._generate_fallback_questions(difficulty, num_questions, language, translator) # Extract key information from content sentences = self._extract_sentences(content) key_facts = self._extract_key_facts(sentences) questions = [] for i in range(num_questions): if i < len(key_facts): question = self._create_question_from_fact(key_facts[i], difficulty, language, translator) else: # Generate additional questions using templates question = self._generate_template_question(content, difficulty, language, translator) if question: questions.append(question) return questions[:num_questions] def _extract_sentences(self, content: str) -> List[str]: """Extract sentences from content""" # Simple sentence splitting sentences = re.split(r'[.!?]+', content) sentences = [s.strip() for s in sentences if len(s.strip()) > 20] return sentences[:50] # Limit to first 50 sentences def _extract_key_facts(self, sentences: List[str]) -> List[Dict[str, str]]: """Extract key facts from sentences""" facts = [] for sentence in sentences: # Look for sentences with dates, numbers, or proper nouns if self._contains_factual_info(sentence): fact = { 'sentence': sentence, 'type': self._classify_fact_type(sentence) } facts.append(fact) return facts def _contains_factual_info(self, sentence: str) -> bool: """Check if sentence contains factual information""" # Look for dates, numbers, proper nouns, etc. patterns = [ r'\b\d{4}\b', # Years r'\b\d+\b', # Numbers r'\b[A-Z][a-z]+\s+[A-Z][a-z]+\b', # Proper nouns r'\bin\s+\d{4}\b', # "in 1947" r'\bwas\s+born\b', # Birth information r'\bwas\s+founded\b', # Foundation information ] return any(re.search(pattern, sentence) for pattern in patterns) def _classify_fact_type(self, sentence: str) -> str: """Classify the type of fact""" if re.search(r'\b\d{4}\b', sentence): return 'date' elif re.search(r'\b\d+\b', sentence): return 'number' elif re.search(r'\b[A-Z][a-z]+\s+[A-Z][a-z]+\b', sentence): return 'person_place' else: return 'general' def _create_question_from_fact(self, fact: Dict[str, str], difficulty: str, language: str, translator) -> Dict[str, Any]: """Create a question from a fact""" sentence = fact['sentence'] fact_type = fact['type'] # Extract the main subject and predicate question_text, correct_answer = self._generate_question_and_answer(sentence, fact_type, difficulty) if not question_text or not correct_answer: return None # Generate distractors (wrong options) options = self._generate_options(correct_answer, fact_type) # Translate if needed if language != 'en': question_text = translator.translate_text(question_text, language) options = [translator.translate_text(opt, language) for opt in options] return { 'question': question_text, 'options': options, 'correct_answer': 0, # Correct answer is always first, will be shuffled 'explanation': f"Based on the text: {sentence[:100]}...", 'difficulty': difficulty, 'type': fact_type } def _generate_question_and_answer(self, sentence: str, fact_type: str, difficulty: str): """Generate question and answer from sentence""" if fact_type == 'date': # Extract year/date date_match = re.search(r'\b(\d{4})\b', sentence) if date_match: year = date_match.group(1) question = f"In which year did the event mentioned in this context occur?" return question, year elif fact_type == 'number': # Extract number num_match = re.search(r'\b(\d+)\b', sentence) if num_match: number = num_match.group(1) question = f"What is the number mentioned in this context?" return question, number elif fact_type == 'person_place': # Extract proper noun name_match = re.search(r'\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\b', sentence) if name_match: name = name_match.group(1) question = f"Who or what is mentioned prominently in this context?" return question, name # General question words = sentence.split() if len(words) > 5: # Create a fill-in-the-blank question blank_word = random.choice(words[2:-2]) # Avoid first and last words question = sentence.replace(blank_word, "______", 1) return f"Fill in the blank: {question}", blank_word return None, None def _generate_options(self, correct_answer: str, fact_type: str) -> List[str]: """Generate multiple choice options""" options = [correct_answer] if fact_type == 'date': # Generate nearby years try: year = int(correct_answer) distractors = [ str(year - 1), str(year + 1), str(year - 5) ] except ValueError: distractors = ["1947", "1950", "1960"] elif fact_type == 'number': # Generate nearby numbers try: num = int(correct_answer) distractors = [ str(num + 1), str(num - 1), str(num * 2) ] except ValueError: distractors = ["10", "20", "100"] else: # Generate generic distractors distractors = [ "Option B", "Option C", "Option D" ] options.extend(distractors[:3]) # Add up to 3 distractors # Shuffle options and remember correct position correct_index = 0 random.shuffle(options) correct_index = options.index(correct_answer) return options def _generate_template_question(self, content: str, difficulty: str, language: str, translator) -> Dict[str, Any]: """Generate question using templates""" # Extract a random sentence for context sentences = self._extract_sentences(content) if not sentences: return None sentence = random.choice(sentences) # Simple template-based question question = f"Based on the content, which of the following is true?" # Create options options = [ "Statement related to the main topic", "Unrelated statement A", "Unrelated statement B", "Unrelated statement C" ] if language != 'en': question = translator.translate_text(question, language) options = [translator.translate_text(opt, language) for opt in options] return { 'question': question, 'options': options, 'correct_answer': 0, 'explanation': f"Based on the content about the topic.", 'difficulty': difficulty, 'type': 'general' } def _generate_fallback_questions(self, difficulty: str, num_questions: int, language: str, translator) -> List[Dict[str, Any]]: """Generate fallback questions when content is insufficient""" fallback_questions = { 'easy': [ { 'question': 'What is the capital of India?', 'options': ['New Delhi', 'Mumbai', 'Kolkata', 'Chennai'], 'correct_answer': 0, 'explanation': 'New Delhi is the capital of India.' }, { 'question': 'Which river is considered sacred in Hinduism?', 'options': ['Ganges', 'Yamuna', 'Narmada', 'Godavari'], 'correct_answer': 0, 'explanation': 'The Ganges river is considered most sacred in Hinduism.' } ], 'medium': [ { 'question': 'In which year did India gain independence?', 'options': ['1947', '1946', '1948', '1950'], 'correct_answer': 0, 'explanation': 'India gained independence on August 15, 1947.' }, { 'question': 'Who was the first Prime Minister of India?', 'options': ['Jawaharlal Nehru', 'Mahatma Gandhi', 'Sardar Patel', 'Subhas Chandra Bose'], 'correct_answer': 0, 'explanation': 'Jawaharlal Nehru was the first Prime Minister of India.' } ], 'hard': [ { 'question': 'Which constitutional amendment is known as the "Mini Constitution"?', 'options': ['42nd Amendment', '44th Amendment', '73rd Amendment', '74th Amendment'], 'correct_answer': 0, 'explanation': 'The 42nd Amendment is called the "Mini Constitution" due to its extensive changes.' } ] } questions = fallback_questions.get(difficulty, fallback_questions['easy']) selected_questions = random.sample(questions, min(num_questions, len(questions))) # Translate if needed if language != 'en': for q in selected_questions: q['question'] = translator.translate_text(q['question'], language) q['options'] = [translator.translate_text(opt, language) for opt in q['options']] q['explanation'] = translator.translate_text(q['explanation'], language) return selected_questions