Spaces:
Build error
Build error
| import random | |
| import re | |
| from typing import List, Dict, Any | |
| class MCQGenerator: | |
| """Generate Multiple Choice Questions from content""" | |
| def __init__(self): | |
| self.question_templates = { | |
| 'easy': [ | |
| "What is {entity}?", | |
| "When did {event} happen?", | |
| "Where is {place} located?", | |
| "Who was {person}?" | |
| ], | |
| 'medium': [ | |
| "What was the significance of {event}?", | |
| "How did {process} work?", | |
| "What caused {event}?", | |
| "What were the effects of {action}?" | |
| ], | |
| 'hard': [ | |
| "Analyze the relationship between {concept1} and {concept2}.", | |
| "What would have happened if {event} had not occurred?", | |
| "Compare and contrast {item1} and {item2}.", | |
| "Evaluate the impact of {factor} on {outcome}." | |
| ] | |
| } | |
| def generate_questions(self, content: str, difficulty: str, num_questions: int, | |
| language: str, translator) -> List[Dict[str, Any]]: | |
| """Generate MCQ questions from content""" | |
| if not content or len(content.strip()) < 100: | |
| return self._generate_fallback_questions(difficulty, num_questions, language, translator) | |
| # Extract key information from content | |
| sentences = self._extract_sentences(content) | |
| key_facts = self._extract_key_facts(sentences) | |
| questions = [] | |
| for i in range(num_questions): | |
| if i < len(key_facts): | |
| question = self._create_question_from_fact(key_facts[i], difficulty, language, translator) | |
| else: | |
| # Generate additional questions using templates | |
| question = self._generate_template_question(content, difficulty, language, translator) | |
| if question: | |
| questions.append(question) | |
| return questions[:num_questions] | |
| def _extract_sentences(self, content: str) -> List[str]: | |
| """Extract sentences from content""" | |
| # Simple sentence splitting | |
| sentences = re.split(r'[.!?]+', content) | |
| sentences = [s.strip() for s in sentences if len(s.strip()) > 20] | |
| return sentences[:50] # Limit to first 50 sentences | |
| def _extract_key_facts(self, sentences: List[str]) -> List[Dict[str, str]]: | |
| """Extract key facts from sentences""" | |
| facts = [] | |
| for sentence in sentences: | |
| # Look for sentences with dates, numbers, or proper nouns | |
| if self._contains_factual_info(sentence): | |
| fact = { | |
| 'sentence': sentence, | |
| 'type': self._classify_fact_type(sentence) | |
| } | |
| facts.append(fact) | |
| return facts | |
| def _contains_factual_info(self, sentence: str) -> bool: | |
| """Check if sentence contains factual information""" | |
| # Look for dates, numbers, proper nouns, etc. | |
| patterns = [ | |
| r'\b\d{4}\b', # Years | |
| r'\b\d+\b', # Numbers | |
| r'\b[A-Z][a-z]+\s+[A-Z][a-z]+\b', # Proper nouns | |
| r'\bin\s+\d{4}\b', # "in 1947" | |
| r'\bwas\s+born\b', # Birth information | |
| r'\bwas\s+founded\b', # Foundation information | |
| ] | |
| return any(re.search(pattern, sentence) for pattern in patterns) | |
| def _classify_fact_type(self, sentence: str) -> str: | |
| """Classify the type of fact""" | |
| if re.search(r'\b\d{4}\b', sentence): | |
| return 'date' | |
| elif re.search(r'\b\d+\b', sentence): | |
| return 'number' | |
| elif re.search(r'\b[A-Z][a-z]+\s+[A-Z][a-z]+\b', sentence): | |
| return 'person_place' | |
| else: | |
| return 'general' | |
| def _create_question_from_fact(self, fact: Dict[str, str], difficulty: str, | |
| language: str, translator) -> Dict[str, Any]: | |
| """Create a question from a fact""" | |
| sentence = fact['sentence'] | |
| fact_type = fact['type'] | |
| # Extract the main subject and predicate | |
| question_text, correct_answer = self._generate_question_and_answer(sentence, fact_type, difficulty) | |
| if not question_text or not correct_answer: | |
| return None | |
| # Generate distractors (wrong options) | |
| options = self._generate_options(correct_answer, fact_type) | |
| # Translate if needed | |
| if language != 'en': | |
| question_text = translator.translate_text(question_text, language) | |
| options = [translator.translate_text(opt, language) for opt in options] | |
| return { | |
| 'question': question_text, | |
| 'options': options, | |
| 'correct_answer': 0, # Correct answer is always first, will be shuffled | |
| 'explanation': f"Based on the text: {sentence[:100]}...", | |
| 'difficulty': difficulty, | |
| 'type': fact_type | |
| } | |
| def _generate_question_and_answer(self, sentence: str, fact_type: str, difficulty: str): | |
| """Generate question and answer from sentence""" | |
| if fact_type == 'date': | |
| # Extract year/date | |
| date_match = re.search(r'\b(\d{4})\b', sentence) | |
| if date_match: | |
| year = date_match.group(1) | |
| question = f"In which year did the event mentioned in this context occur?" | |
| return question, year | |
| elif fact_type == 'number': | |
| # Extract number | |
| num_match = re.search(r'\b(\d+)\b', sentence) | |
| if num_match: | |
| number = num_match.group(1) | |
| question = f"What is the number mentioned in this context?" | |
| return question, number | |
| elif fact_type == 'person_place': | |
| # Extract proper noun | |
| name_match = re.search(r'\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\b', sentence) | |
| if name_match: | |
| name = name_match.group(1) | |
| question = f"Who or what is mentioned prominently in this context?" | |
| return question, name | |
| # General question | |
| words = sentence.split() | |
| if len(words) > 5: | |
| # Create a fill-in-the-blank question | |
| blank_word = random.choice(words[2:-2]) # Avoid first and last words | |
| question = sentence.replace(blank_word, "______", 1) | |
| return f"Fill in the blank: {question}", blank_word | |
| return None, None | |
| def _generate_options(self, correct_answer: str, fact_type: str) -> List[str]: | |
| """Generate multiple choice options""" | |
| options = [correct_answer] | |
| if fact_type == 'date': | |
| # Generate nearby years | |
| try: | |
| year = int(correct_answer) | |
| distractors = [ | |
| str(year - 1), | |
| str(year + 1), | |
| str(year - 5) | |
| ] | |
| except ValueError: | |
| distractors = ["1947", "1950", "1960"] | |
| elif fact_type == 'number': | |
| # Generate nearby numbers | |
| try: | |
| num = int(correct_answer) | |
| distractors = [ | |
| str(num + 1), | |
| str(num - 1), | |
| str(num * 2) | |
| ] | |
| except ValueError: | |
| distractors = ["10", "20", "100"] | |
| else: | |
| # Generate generic distractors | |
| distractors = [ | |
| "Option B", | |
| "Option C", | |
| "Option D" | |
| ] | |
| options.extend(distractors[:3]) # Add up to 3 distractors | |
| # Shuffle options and remember correct position | |
| correct_index = 0 | |
| random.shuffle(options) | |
| correct_index = options.index(correct_answer) | |
| return options | |
| def _generate_template_question(self, content: str, difficulty: str, | |
| language: str, translator) -> Dict[str, Any]: | |
| """Generate question using templates""" | |
| # Extract a random sentence for context | |
| sentences = self._extract_sentences(content) | |
| if not sentences: | |
| return None | |
| sentence = random.choice(sentences) | |
| # Simple template-based question | |
| question = f"Based on the content, which of the following is true?" | |
| # Create options | |
| options = [ | |
| "Statement related to the main topic", | |
| "Unrelated statement A", | |
| "Unrelated statement B", | |
| "Unrelated statement C" | |
| ] | |
| if language != 'en': | |
| question = translator.translate_text(question, language) | |
| options = [translator.translate_text(opt, language) for opt in options] | |
| return { | |
| 'question': question, | |
| 'options': options, | |
| 'correct_answer': 0, | |
| 'explanation': f"Based on the content about the topic.", | |
| 'difficulty': difficulty, | |
| 'type': 'general' | |
| } | |
| def _generate_fallback_questions(self, difficulty: str, num_questions: int, | |
| language: str, translator) -> List[Dict[str, Any]]: | |
| """Generate fallback questions when content is insufficient""" | |
| fallback_questions = { | |
| 'easy': [ | |
| { | |
| 'question': 'What is the capital of India?', | |
| 'options': ['New Delhi', 'Mumbai', 'Kolkata', 'Chennai'], | |
| 'correct_answer': 0, | |
| 'explanation': 'New Delhi is the capital of India.' | |
| }, | |
| { | |
| 'question': 'Which river is considered sacred in Hinduism?', | |
| 'options': ['Ganges', 'Yamuna', 'Narmada', 'Godavari'], | |
| 'correct_answer': 0, | |
| 'explanation': 'The Ganges river is considered most sacred in Hinduism.' | |
| } | |
| ], | |
| 'medium': [ | |
| { | |
| 'question': 'In which year did India gain independence?', | |
| 'options': ['1947', '1946', '1948', '1950'], | |
| 'correct_answer': 0, | |
| 'explanation': 'India gained independence on August 15, 1947.' | |
| }, | |
| { | |
| 'question': 'Who was the first Prime Minister of India?', | |
| 'options': ['Jawaharlal Nehru', 'Mahatma Gandhi', 'Sardar Patel', 'Subhas Chandra Bose'], | |
| 'correct_answer': 0, | |
| 'explanation': 'Jawaharlal Nehru was the first Prime Minister of India.' | |
| } | |
| ], | |
| 'hard': [ | |
| { | |
| 'question': 'Which constitutional amendment is known as the "Mini Constitution"?', | |
| 'options': ['42nd Amendment', '44th Amendment', '73rd Amendment', '74th Amendment'], | |
| 'correct_answer': 0, | |
| 'explanation': 'The 42nd Amendment is called the "Mini Constitution" due to its extensive changes.' | |
| } | |
| ] | |
| } | |
| questions = fallback_questions.get(difficulty, fallback_questions['easy']) | |
| selected_questions = random.sample(questions, min(num_questions, len(questions))) | |
| # Translate if needed | |
| if language != 'en': | |
| for q in selected_questions: | |
| q['question'] = translator.translate_text(q['question'], language) | |
| q['options'] = [translator.translate_text(opt, language) for opt in q['options']] | |
| q['explanation'] = translator.translate_text(q['explanation'], language) | |
| return selected_questions |