File size: 9,252 Bytes

fec9168

"""
Question generation utilities for MCQ and open-text formats.
"""

import random
from typing import Dict, List, Optional, Tuple

from .logger import setup_logger

logger = setup_logger(__name__)


class QuestionGenerator:
    """Generates questions in MCQ and open-text formats."""
    
    def __init__(
        self,
        num_options: int = 4,
        option_labels: Optional[List[str]] = None,
        distractor_strategy: str = "balanced"
    ):
        """
        Initialize question generator.
        
        Args:
            num_options: Number of MCQ options
            option_labels: Labels for options (e.g., ['A', 'B', 'C', 'D'])
            distractor_strategy: Strategy for generating distractor options
                - "present_only": only use sounds present in audio
                - "mixed": mix of present and absent sounds
                - "balanced": balanced distribution
        """
        self.num_options = num_options
        self.option_labels = option_labels or ["A", "B", "C", "D"]
        self.distractor_strategy = distractor_strategy
        
        if len(self.option_labels) != num_options:
            raise ValueError(f"Number of option labels must match num_options ({num_options})")
    
    def generate_count_mcq(
        self,
        question_template: str,
        correct_count: int,
        all_categories: List[str]
    ) -> Dict:
        """
        Generate an MCQ for counting task.
        
        Args:
            question_template: Question text template
            correct_count: Correct number of unique sounds
            all_categories: List of all available categories
            
        Returns:
            Dictionary with question, options, and correct answer
        """
        # Generate options (including the correct answer)
        options = self._generate_count_options(correct_count)
        
        # Shuffle options
        random.shuffle(options)
        
        # Find correct answer label
        correct_label = self.option_labels[options.index(correct_count)]
        
        # Create option mapping
        option_map = {label: value for label, value in zip(self.option_labels, options)}
        
        return {
            "question": question_template,
            "options": option_map,
            "correct_answer": correct_label,
            "correct_value": correct_count
        }
    
    def generate_count_open_text(
        self,
        question_template: str,
        correct_count: int
    ) -> Dict:
        """
        Generate an open-text question for counting task.
        
        Args:
            question_template: Question text template
            correct_count: Correct number of unique sounds
            
        Returns:
            Dictionary with question and correct answer
        """
        return {
            "question": question_template,
            "correct_answer": str(correct_count)
        }
    
    def generate_category_mcq(
        self,
        question_template: str,
        correct_category: str,
        present_categories: List[str],
        all_categories: List[str]
    ) -> Dict:
        """
        Generate an MCQ where answer is a sound category.
        
        Args:
            question_template: Question text template
            correct_category: Correct category
            present_categories: Categories present in the audio
            all_categories: All available categories
            
        Returns:
            Dictionary with question, options, and correct answer
        """
        # Generate distractor options
        distractors = self._generate_category_distractors(
            correct_category,
            present_categories,
            all_categories,
            self.num_options - 1
        )
        
        # Combine with correct answer
        options = [correct_category] + distractors
        random.shuffle(options)
        
        # Find correct answer label
        correct_label = self.option_labels[options.index(correct_category)]
        
        # Create option mapping
        option_map = {label: value for label, value in zip(self.option_labels, options)}
        
        return {
            "question": question_template,
            "options": option_map,
            "correct_answer": correct_label,
            "correct_value": correct_category
        }
    
    def generate_category_open_text(
        self,
        question_template: str,
        correct_category: str
    ) -> Dict:
        """
        Generate an open-text question where answer is a sound category.
        
        Args:
            question_template: Question text template
            correct_category: Correct category
            
        Returns:
            Dictionary with question and correct answer
        """
        return {
            "question": question_template,
            "correct_answer": correct_category
        }
    
    def generate_sequence_open_text(
        self,
        question_template: str,
        sequence: List[str]
    ) -> Dict:
        """
        Generate an open-text question for sequence/ordering.
        
        Args:
            question_template: Question text template
            sequence: List of categories in order
            
        Returns:
            Dictionary with question and correct answer
        """
        return {
            "question": question_template,
            "correct_answer": ", ".join(sequence)
        }
    
    def _generate_count_options(self, correct_count: int) -> List[int]:
        """
        Generate count options including the correct count.
        
        Args:
            correct_count: Correct count value
            
        Returns:
            List of count options
        """
        options = [correct_count]
        
        # Generate distractors (minimum count is 1, not 0)
        possible_values = list(range(1, max(correct_count + 3, 12)))
        possible_values = [v for v in possible_values if v != correct_count]
        
        distractors = random.sample(possible_values, min(self.num_options - 1, len(possible_values)))
        options.extend(distractors)
        
        return options[:self.num_options]
    
    def _generate_category_distractors(
        self,
        correct_category: str,
        present_categories: List[str],
        all_categories: List[str],
        num_distractors: int
    ) -> List[str]:
        """
        Generate distractor categories based on strategy.
        
        Args:
            correct_category: Correct category
            present_categories: Categories present in audio
            all_categories: All available categories
            num_distractors: Number of distractors to generate
            
        Returns:
            List of distractor categories
        """
        present_non_answer = [c for c in present_categories if c != correct_category]
        absent_categories = [c for c in all_categories if c not in present_categories]
        
        distractors = []
        
        if self.distractor_strategy == "present_only":
            # Only use categories present in the audio
            if len(present_non_answer) >= num_distractors:
                distractors = random.sample(present_non_answer, num_distractors)
            else:
                distractors = present_non_answer.copy()
                # Fill remaining with random absent categories
                remaining = num_distractors - len(distractors)
                distractors.extend(random.sample(absent_categories, min(remaining, len(absent_categories))))
                
        elif self.distractor_strategy == "mixed":
            # Mix of present and absent (random proportion)
            num_present = random.randint(0, min(len(present_non_answer), num_distractors))
            num_absent = num_distractors - num_present
            
            if num_present > 0:
                distractors.extend(random.sample(present_non_answer, min(num_present, len(present_non_answer))))
            if num_absent > 0:
                distractors.extend(random.sample(absent_categories, min(num_absent, len(absent_categories))))
                
        else:  # balanced
            # Balanced distribution: 0, 1, or 2 present sounds as distractors
            num_present_distractor = random.choice([0, 1, 2])
            num_present_distractor = min(num_present_distractor, len(present_non_answer), num_distractors)
            num_absent_distractor = num_distractors - num_present_distractor
            
            if num_present_distractor > 0:
                distractors.extend(random.sample(present_non_answer, num_present_distractor))
            if num_absent_distractor > 0:
                distractors.extend(random.sample(absent_categories, min(num_absent_distractor, len(absent_categories))))
        
        # Fill remaining slots if needed
        while len(distractors) < num_distractors:
            remaining_options = [c for c in all_categories if c not in distractors and c != correct_category]
            if not remaining_options:
                break
            distractors.append(random.choice(remaining_options))
        
        return distractors[:num_distractors]