File size: 9,252 Bytes
fec9168
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
"""
Question generation utilities for MCQ and open-text formats.
"""

import random
from typing import Dict, List, Optional, Tuple

from .logger import setup_logger

logger = setup_logger(__name__)


class QuestionGenerator:
    """Generates questions in MCQ and open-text formats."""
    
    def __init__(
        self,
        num_options: int = 4,
        option_labels: Optional[List[str]] = None,
        distractor_strategy: str = "balanced"
    ):
        """
        Initialize question generator.
        
        Args:
            num_options: Number of MCQ options
            option_labels: Labels for options (e.g., ['A', 'B', 'C', 'D'])
            distractor_strategy: Strategy for generating distractor options
                - "present_only": only use sounds present in audio
                - "mixed": mix of present and absent sounds
                - "balanced": balanced distribution
        """
        self.num_options = num_options
        self.option_labels = option_labels or ["A", "B", "C", "D"]
        self.distractor_strategy = distractor_strategy
        
        if len(self.option_labels) != num_options:
            raise ValueError(f"Number of option labels must match num_options ({num_options})")
    
    def generate_count_mcq(
        self,
        question_template: str,
        correct_count: int,
        all_categories: List[str]
    ) -> Dict:
        """
        Generate an MCQ for counting task.
        
        Args:
            question_template: Question text template
            correct_count: Correct number of unique sounds
            all_categories: List of all available categories
            
        Returns:
            Dictionary with question, options, and correct answer
        """
        # Generate options (including the correct answer)
        options = self._generate_count_options(correct_count)
        
        # Shuffle options
        random.shuffle(options)
        
        # Find correct answer label
        correct_label = self.option_labels[options.index(correct_count)]
        
        # Create option mapping
        option_map = {label: value for label, value in zip(self.option_labels, options)}
        
        return {
            "question": question_template,
            "options": option_map,
            "correct_answer": correct_label,
            "correct_value": correct_count
        }
    
    def generate_count_open_text(
        self,
        question_template: str,
        correct_count: int
    ) -> Dict:
        """
        Generate an open-text question for counting task.
        
        Args:
            question_template: Question text template
            correct_count: Correct number of unique sounds
            
        Returns:
            Dictionary with question and correct answer
        """
        return {
            "question": question_template,
            "correct_answer": str(correct_count)
        }
    
    def generate_category_mcq(
        self,
        question_template: str,
        correct_category: str,
        present_categories: List[str],
        all_categories: List[str]
    ) -> Dict:
        """
        Generate an MCQ where answer is a sound category.
        
        Args:
            question_template: Question text template
            correct_category: Correct category
            present_categories: Categories present in the audio
            all_categories: All available categories
            
        Returns:
            Dictionary with question, options, and correct answer
        """
        # Generate distractor options
        distractors = self._generate_category_distractors(
            correct_category,
            present_categories,
            all_categories,
            self.num_options - 1
        )
        
        # Combine with correct answer
        options = [correct_category] + distractors
        random.shuffle(options)
        
        # Find correct answer label
        correct_label = self.option_labels[options.index(correct_category)]
        
        # Create option mapping
        option_map = {label: value for label, value in zip(self.option_labels, options)}
        
        return {
            "question": question_template,
            "options": option_map,
            "correct_answer": correct_label,
            "correct_value": correct_category
        }
    
    def generate_category_open_text(
        self,
        question_template: str,
        correct_category: str
    ) -> Dict:
        """
        Generate an open-text question where answer is a sound category.
        
        Args:
            question_template: Question text template
            correct_category: Correct category
            
        Returns:
            Dictionary with question and correct answer
        """
        return {
            "question": question_template,
            "correct_answer": correct_category
        }
    
    def generate_sequence_open_text(
        self,
        question_template: str,
        sequence: List[str]
    ) -> Dict:
        """
        Generate an open-text question for sequence/ordering.
        
        Args:
            question_template: Question text template
            sequence: List of categories in order
            
        Returns:
            Dictionary with question and correct answer
        """
        return {
            "question": question_template,
            "correct_answer": ", ".join(sequence)
        }
    
    def _generate_count_options(self, correct_count: int) -> List[int]:
        """
        Generate count options including the correct count.
        
        Args:
            correct_count: Correct count value
            
        Returns:
            List of count options
        """
        options = [correct_count]
        
        # Generate distractors (minimum count is 1, not 0)
        possible_values = list(range(1, max(correct_count + 3, 12)))
        possible_values = [v for v in possible_values if v != correct_count]
        
        distractors = random.sample(possible_values, min(self.num_options - 1, len(possible_values)))
        options.extend(distractors)
        
        return options[:self.num_options]
    
    def _generate_category_distractors(
        self,
        correct_category: str,
        present_categories: List[str],
        all_categories: List[str],
        num_distractors: int
    ) -> List[str]:
        """
        Generate distractor categories based on strategy.
        
        Args:
            correct_category: Correct category
            present_categories: Categories present in audio
            all_categories: All available categories
            num_distractors: Number of distractors to generate
            
        Returns:
            List of distractor categories
        """
        present_non_answer = [c for c in present_categories if c != correct_category]
        absent_categories = [c for c in all_categories if c not in present_categories]
        
        distractors = []
        
        if self.distractor_strategy == "present_only":
            # Only use categories present in the audio
            if len(present_non_answer) >= num_distractors:
                distractors = random.sample(present_non_answer, num_distractors)
            else:
                distractors = present_non_answer.copy()
                # Fill remaining with random absent categories
                remaining = num_distractors - len(distractors)
                distractors.extend(random.sample(absent_categories, min(remaining, len(absent_categories))))
                
        elif self.distractor_strategy == "mixed":
            # Mix of present and absent (random proportion)
            num_present = random.randint(0, min(len(present_non_answer), num_distractors))
            num_absent = num_distractors - num_present
            
            if num_present > 0:
                distractors.extend(random.sample(present_non_answer, min(num_present, len(present_non_answer))))
            if num_absent > 0:
                distractors.extend(random.sample(absent_categories, min(num_absent, len(absent_categories))))
                
        else:  # balanced
            # Balanced distribution: 0, 1, or 2 present sounds as distractors
            num_present_distractor = random.choice([0, 1, 2])
            num_present_distractor = min(num_present_distractor, len(present_non_answer), num_distractors)
            num_absent_distractor = num_distractors - num_present_distractor
            
            if num_present_distractor > 0:
                distractors.extend(random.sample(present_non_answer, num_present_distractor))
            if num_absent_distractor > 0:
                distractors.extend(random.sample(absent_categories, min(num_absent_distractor, len(absent_categories))))
        
        # Fill remaining slots if needed
        while len(distractors) < num_distractors:
            remaining_options = [c for c in all_categories if c not in distractors and c != correct_category]
            if not remaining_options:
                break
            distractors.append(random.choice(remaining_options))
        
        return distractors[:num_distractors]