|
|
""" |
|
|
LLM-based question generation utilities. |
|
|
|
|
|
Supports multiple LLM providers for generating natural, lexically consistent questions. |
|
|
""" |
|
|
|
|
|
import os |
|
|
import random |
|
|
from typing import Dict, List, Optional, Tuple |
|
|
import json |
|
|
|
|
|
from .logger import setup_logger |
|
|
|
|
|
logger = setup_logger(__name__) |
|
|
|
|
|
|
|
|
class LLMQuestionGenerator: |
|
|
"""Generate questions using local Llama 3.1 8B Instruct LLM.""" |
|
|
|
|
|
def __init__( |
|
|
self, |
|
|
enabled: bool = False, |
|
|
template_questions: Optional[Dict] = None |
|
|
): |
|
|
""" |
|
|
Initialize LLM question generator. |
|
|
|
|
|
Args: |
|
|
enabled: Whether LLM generation is enabled |
|
|
template_questions: Template questions for fallback |
|
|
""" |
|
|
self.enabled = enabled |
|
|
self.template_questions = template_questions or {} |
|
|
|
|
|
if not self.enabled: |
|
|
logger.info("LLM generation disabled, using templates") |
|
|
return |
|
|
|
|
|
|
|
|
|
|
|
logger.info("LLM generation enabled (local Llama 3.1 8B)") |
|
|
logger.warning("Local LLM integration not yet implemented, falling back to templates") |
|
|
|
|
|
|
|
|
def generate_count_questions( |
|
|
self, |
|
|
correct_count: int, |
|
|
categories_present: List[str], |
|
|
generate_both: bool = True |
|
|
) -> Dict: |
|
|
""" |
|
|
Generate count task questions. |
|
|
|
|
|
Args: |
|
|
correct_count: Correct number of unique sounds |
|
|
categories_present: List of sound categories in the audio |
|
|
generate_both: Whether to generate both MCQ and open-text |
|
|
|
|
|
Returns: |
|
|
Dictionary with mcq_question and/or open_text_question |
|
|
""" |
|
|
|
|
|
|
|
|
return self._generate_count_template(correct_count) |
|
|
|
|
|
def generate_category_questions( |
|
|
self, |
|
|
task_type: str, |
|
|
correct_category: str, |
|
|
categories_present: List[str], |
|
|
context: Optional[Dict] = None |
|
|
) -> Dict: |
|
|
""" |
|
|
Generate questions where the answer is a sound category. |
|
|
|
|
|
Args: |
|
|
task_type: Type of task (duration, order, volume) |
|
|
correct_category: Correct answer category |
|
|
categories_present: All categories in the audio |
|
|
context: Additional context (e.g., question_type, reference_sound) |
|
|
|
|
|
Returns: |
|
|
Dictionary with mcq_question and open_text_question |
|
|
""" |
|
|
|
|
|
|
|
|
return self._generate_category_template(task_type, correct_category, context) |
|
|
|
|
|
def _generate_count_template(self, correct_count: int) -> Dict: |
|
|
"""Generate count questions from templates.""" |
|
|
mcq_templates = self.template_questions.get("count", {}).get("mcq", [ |
|
|
"What is the number of distinct sound sources in the audio file?", |
|
|
"How many different types of sounds can be identified in this recording?" |
|
|
]) |
|
|
open_templates = self.template_questions.get("count", {}).get("open_text", [ |
|
|
"How many distinct sound sources are present in the audio?", |
|
|
"Count the number of unique sounds in this recording." |
|
|
]) |
|
|
|
|
|
return { |
|
|
"mcq_question": random.choice(mcq_templates), |
|
|
"open_text_question": random.choice(open_templates) |
|
|
} |
|
|
|
|
|
def _generate_category_template( |
|
|
self, |
|
|
task_type: str, |
|
|
correct_category: str, |
|
|
context: Optional[Dict] |
|
|
) -> Dict: |
|
|
"""Generate category questions from templates.""" |
|
|
context = context or {} |
|
|
|
|
|
if task_type == "duration": |
|
|
q_type = context.get("question_type", "shortest") |
|
|
mcq_q = f"Which of the following sounds is heard for the {q_type} duration?" |
|
|
open_q = f"Which sound is heard for the {q_type} duration in the audio?" |
|
|
|
|
|
elif task_type == "order": |
|
|
q_subtype = context.get("question_subtype", "first") |
|
|
if q_subtype == "first": |
|
|
mcq_q = "Which sound appears first in the audio clip?" |
|
|
open_q = "What is the first sound you hear in the audio?" |
|
|
elif q_subtype == "last": |
|
|
mcq_q = "Which sound appears last in the audio clip?" |
|
|
open_q = "What is the last sound you hear in the audio?" |
|
|
elif q_subtype == "after": |
|
|
ref = context.get("reference_sound", "") |
|
|
mcq_q = f"Which sound comes after {ref}?" |
|
|
open_q = f"What sound comes after {ref}?" |
|
|
else: |
|
|
ref = context.get("reference_sound", "") |
|
|
mcq_q = f"Which sound comes before {ref}?" |
|
|
open_q = f"What sound comes before {ref}?" |
|
|
|
|
|
else: |
|
|
q_type = context.get("question_type", "loudest") |
|
|
mcq_q = f"Which sound is the {q_type} in the audio?" |
|
|
open_q = f"Identify the {q_type} sound in the audio clip." |
|
|
|
|
|
return { |
|
|
"mcq_question": mcq_q, |
|
|
"open_text_question": open_q |
|
|
} |
|
|
|