""" LLM-based question generation utilities. Supports multiple LLM providers for generating natural, lexically consistent questions. """ import os import random from typing import Dict, List, Optional, Tuple import json from .logger import setup_logger logger = setup_logger(__name__) class LLMQuestionGenerator: """Generate questions using local Llama 3.1 8B Instruct LLM.""" def __init__( self, enabled: bool = False, template_questions: Optional[Dict] = None ): """ Initialize LLM question generator. Args: enabled: Whether LLM generation is enabled template_questions: Template questions for fallback """ self.enabled = enabled self.template_questions = template_questions or {} if not self.enabled: logger.info("LLM generation disabled, using templates") return # TODO: Initialize local Llama 3.1 8B model connection # This will be implemented based on your local LLM setup logger.info("LLM generation enabled (local Llama 3.1 8B)") logger.warning("Local LLM integration not yet implemented, falling back to templates") def generate_count_questions( self, correct_count: int, categories_present: List[str], generate_both: bool = True ) -> Dict: """ Generate count task questions. Args: correct_count: Correct number of unique sounds categories_present: List of sound categories in the audio generate_both: Whether to generate both MCQ and open-text Returns: Dictionary with mcq_question and/or open_text_question """ # TODO: Implement LLM generation when enabled # For now, always use templates return self._generate_count_template(correct_count) def generate_category_questions( self, task_type: str, correct_category: str, categories_present: List[str], context: Optional[Dict] = None ) -> Dict: """ Generate questions where the answer is a sound category. Args: task_type: Type of task (duration, order, volume) correct_category: Correct answer category categories_present: All categories in the audio context: Additional context (e.g., question_type, reference_sound) Returns: Dictionary with mcq_question and open_text_question """ # TODO: Implement LLM generation when enabled # For now, always use templates return self._generate_category_template(task_type, correct_category, context) def _generate_count_template(self, correct_count: int) -> Dict: """Generate count questions from templates.""" mcq_templates = self.template_questions.get("count", {}).get("mcq", [ "What is the number of distinct sound sources in the audio file?", "How many different types of sounds can be identified in this recording?" ]) open_templates = self.template_questions.get("count", {}).get("open_text", [ "How many distinct sound sources are present in the audio?", "Count the number of unique sounds in this recording." ]) return { "mcq_question": random.choice(mcq_templates), "open_text_question": random.choice(open_templates) } def _generate_category_template( self, task_type: str, correct_category: str, context: Optional[Dict] ) -> Dict: """Generate category questions from templates.""" context = context or {} if task_type == "duration": q_type = context.get("question_type", "shortest") mcq_q = f"Which of the following sounds is heard for the {q_type} duration?" open_q = f"Which sound is heard for the {q_type} duration in the audio?" elif task_type == "order": q_subtype = context.get("question_subtype", "first") if q_subtype == "first": mcq_q = "Which sound appears first in the audio clip?" open_q = "What is the first sound you hear in the audio?" elif q_subtype == "last": mcq_q = "Which sound appears last in the audio clip?" open_q = "What is the last sound you hear in the audio?" elif q_subtype == "after": ref = context.get("reference_sound", "") mcq_q = f"Which sound comes after {ref}?" open_q = f"What sound comes after {ref}?" else: ref = context.get("reference_sound", "") mcq_q = f"Which sound comes before {ref}?" open_q = f"What sound comes before {ref}?" else: # volume q_type = context.get("question_type", "loudest") mcq_q = f"Which sound is the {q_type} in the audio?" open_q = f"Identify the {q_type} sound in the audio clip." return { "mcq_question": mcq_q, "open_text_question": open_q }