|
|
""" |
|
|
Question generation utilities for MCQ and open-text formats. |
|
|
""" |
|
|
|
|
|
import random |
|
|
from typing import Dict, List, Optional, Tuple |
|
|
|
|
|
from .logger import setup_logger |
|
|
|
|
|
logger = setup_logger(__name__) |
|
|
|
|
|
|
|
|
class QuestionGenerator: |
|
|
"""Generates questions in MCQ and open-text formats.""" |
|
|
|
|
|
def __init__( |
|
|
self, |
|
|
num_options: int = 4, |
|
|
option_labels: Optional[List[str]] = None, |
|
|
distractor_strategy: str = "balanced" |
|
|
): |
|
|
""" |
|
|
Initialize question generator. |
|
|
|
|
|
Args: |
|
|
num_options: Number of MCQ options |
|
|
option_labels: Labels for options (e.g., ['A', 'B', 'C', 'D']) |
|
|
distractor_strategy: Strategy for generating distractor options |
|
|
- "present_only": only use sounds present in audio |
|
|
- "mixed": mix of present and absent sounds |
|
|
- "balanced": balanced distribution |
|
|
""" |
|
|
self.num_options = num_options |
|
|
self.option_labels = option_labels or ["A", "B", "C", "D"] |
|
|
self.distractor_strategy = distractor_strategy |
|
|
|
|
|
if len(self.option_labels) != num_options: |
|
|
raise ValueError(f"Number of option labels must match num_options ({num_options})") |
|
|
|
|
|
def generate_count_mcq( |
|
|
self, |
|
|
question_template: str, |
|
|
correct_count: int, |
|
|
all_categories: List[str] |
|
|
) -> Dict: |
|
|
""" |
|
|
Generate an MCQ for counting task. |
|
|
|
|
|
Args: |
|
|
question_template: Question text template |
|
|
correct_count: Correct number of unique sounds |
|
|
all_categories: List of all available categories |
|
|
|
|
|
Returns: |
|
|
Dictionary with question, options, and correct answer |
|
|
""" |
|
|
|
|
|
options = self._generate_count_options(correct_count) |
|
|
|
|
|
|
|
|
random.shuffle(options) |
|
|
|
|
|
|
|
|
correct_label = self.option_labels[options.index(correct_count)] |
|
|
|
|
|
|
|
|
option_map = {label: value for label, value in zip(self.option_labels, options)} |
|
|
|
|
|
return { |
|
|
"question": question_template, |
|
|
"options": option_map, |
|
|
"correct_answer": correct_label, |
|
|
"correct_value": correct_count |
|
|
} |
|
|
|
|
|
def generate_count_open_text( |
|
|
self, |
|
|
question_template: str, |
|
|
correct_count: int |
|
|
) -> Dict: |
|
|
""" |
|
|
Generate an open-text question for counting task. |
|
|
|
|
|
Args: |
|
|
question_template: Question text template |
|
|
correct_count: Correct number of unique sounds |
|
|
|
|
|
Returns: |
|
|
Dictionary with question and correct answer |
|
|
""" |
|
|
return { |
|
|
"question": question_template, |
|
|
"correct_answer": str(correct_count) |
|
|
} |
|
|
|
|
|
def generate_category_mcq( |
|
|
self, |
|
|
question_template: str, |
|
|
correct_category: str, |
|
|
present_categories: List[str], |
|
|
all_categories: List[str] |
|
|
) -> Dict: |
|
|
""" |
|
|
Generate an MCQ where answer is a sound category. |
|
|
|
|
|
Args: |
|
|
question_template: Question text template |
|
|
correct_category: Correct category |
|
|
present_categories: Categories present in the audio |
|
|
all_categories: All available categories |
|
|
|
|
|
Returns: |
|
|
Dictionary with question, options, and correct answer |
|
|
""" |
|
|
|
|
|
distractors = self._generate_category_distractors( |
|
|
correct_category, |
|
|
present_categories, |
|
|
all_categories, |
|
|
self.num_options - 1 |
|
|
) |
|
|
|
|
|
|
|
|
options = [correct_category] + distractors |
|
|
random.shuffle(options) |
|
|
|
|
|
|
|
|
correct_label = self.option_labels[options.index(correct_category)] |
|
|
|
|
|
|
|
|
option_map = {label: value for label, value in zip(self.option_labels, options)} |
|
|
|
|
|
return { |
|
|
"question": question_template, |
|
|
"options": option_map, |
|
|
"correct_answer": correct_label, |
|
|
"correct_value": correct_category |
|
|
} |
|
|
|
|
|
def generate_category_open_text( |
|
|
self, |
|
|
question_template: str, |
|
|
correct_category: str |
|
|
) -> Dict: |
|
|
""" |
|
|
Generate an open-text question where answer is a sound category. |
|
|
|
|
|
Args: |
|
|
question_template: Question text template |
|
|
correct_category: Correct category |
|
|
|
|
|
Returns: |
|
|
Dictionary with question and correct answer |
|
|
""" |
|
|
return { |
|
|
"question": question_template, |
|
|
"correct_answer": correct_category |
|
|
} |
|
|
|
|
|
def generate_sequence_open_text( |
|
|
self, |
|
|
question_template: str, |
|
|
sequence: List[str] |
|
|
) -> Dict: |
|
|
""" |
|
|
Generate an open-text question for sequence/ordering. |
|
|
|
|
|
Args: |
|
|
question_template: Question text template |
|
|
sequence: List of categories in order |
|
|
|
|
|
Returns: |
|
|
Dictionary with question and correct answer |
|
|
""" |
|
|
return { |
|
|
"question": question_template, |
|
|
"correct_answer": ", ".join(sequence) |
|
|
} |
|
|
|
|
|
def _generate_count_options(self, correct_count: int) -> List[int]: |
|
|
""" |
|
|
Generate count options including the correct count. |
|
|
|
|
|
Args: |
|
|
correct_count: Correct count value |
|
|
|
|
|
Returns: |
|
|
List of count options |
|
|
""" |
|
|
options = [correct_count] |
|
|
|
|
|
|
|
|
possible_values = list(range(1, max(correct_count + 3, 12))) |
|
|
possible_values = [v for v in possible_values if v != correct_count] |
|
|
|
|
|
distractors = random.sample(possible_values, min(self.num_options - 1, len(possible_values))) |
|
|
options.extend(distractors) |
|
|
|
|
|
return options[:self.num_options] |
|
|
|
|
|
def _generate_category_distractors( |
|
|
self, |
|
|
correct_category: str, |
|
|
present_categories: List[str], |
|
|
all_categories: List[str], |
|
|
num_distractors: int |
|
|
) -> List[str]: |
|
|
""" |
|
|
Generate distractor categories based on strategy. |
|
|
|
|
|
Args: |
|
|
correct_category: Correct category |
|
|
present_categories: Categories present in audio |
|
|
all_categories: All available categories |
|
|
num_distractors: Number of distractors to generate |
|
|
|
|
|
Returns: |
|
|
List of distractor categories |
|
|
""" |
|
|
present_non_answer = [c for c in present_categories if c != correct_category] |
|
|
absent_categories = [c for c in all_categories if c not in present_categories] |
|
|
|
|
|
distractors = [] |
|
|
|
|
|
if self.distractor_strategy == "present_only": |
|
|
|
|
|
if len(present_non_answer) >= num_distractors: |
|
|
distractors = random.sample(present_non_answer, num_distractors) |
|
|
else: |
|
|
distractors = present_non_answer.copy() |
|
|
|
|
|
remaining = num_distractors - len(distractors) |
|
|
distractors.extend(random.sample(absent_categories, min(remaining, len(absent_categories)))) |
|
|
|
|
|
elif self.distractor_strategy == "mixed": |
|
|
|
|
|
num_present = random.randint(0, min(len(present_non_answer), num_distractors)) |
|
|
num_absent = num_distractors - num_present |
|
|
|
|
|
if num_present > 0: |
|
|
distractors.extend(random.sample(present_non_answer, min(num_present, len(present_non_answer)))) |
|
|
if num_absent > 0: |
|
|
distractors.extend(random.sample(absent_categories, min(num_absent, len(absent_categories)))) |
|
|
|
|
|
else: |
|
|
|
|
|
num_present_distractor = random.choice([0, 1, 2]) |
|
|
num_present_distractor = min(num_present_distractor, len(present_non_answer), num_distractors) |
|
|
num_absent_distractor = num_distractors - num_present_distractor |
|
|
|
|
|
if num_present_distractor > 0: |
|
|
distractors.extend(random.sample(present_non_answer, num_present_distractor)) |
|
|
if num_absent_distractor > 0: |
|
|
distractors.extend(random.sample(absent_categories, min(num_absent_distractor, len(absent_categories)))) |
|
|
|
|
|
|
|
|
while len(distractors) < num_distractors: |
|
|
remaining_options = [c for c in all_categories if c not in distractors and c != correct_category] |
|
|
if not remaining_options: |
|
|
break |
|
|
distractors.append(random.choice(remaining_options)) |
|
|
|
|
|
return distractors[:num_distractors] |
|
|
|