Spaces:

Bryceeee
/

CSRC-Car-Manual-RAG

Sleeping

File size: 7,241 Bytes

"""

Question Generation Module

Generates multiple-choice questions based on Bloom's taxonomy

"""
import json
from typing import Dict, List
from openai import OpenAI


class QuestionGenerator:
    """Generates educational questions based on Bloom's taxonomy"""
    
    def __init__(self, client: OpenAI, rag_query_engine):
        self.client = client
        self.rag_query_engine = rag_query_engine
        
        self.blooms_levels = {
            "remember": "generate questions that test basic recall of facts and information",
            "understand": "generate questions that test explanation and interpretation of concepts",
            "apply": "generate questions that test application of knowledge in practical situations",
            "analyze": "generate questions that test analysis of relationships and structure",
            "evaluate": "generate questions that test evaluation and judgment based on criteria",
            "create": "generate questions that test creation of new ideas or solutions"
        }
    
    def generate_questions(self, topic_file: str) -> Dict[str, Dict]:
        """

        Generate multiple-choice questions for all Bloom's taxonomy levels

        

        Args:

            topic_file: Name of the topic file (PDF)

            

        Returns:

            Dictionary mapping Bloom's level to question data

        """
        topic_clean = topic_file.replace('.pdf', '')
        
        # Get content about this topic
        file_content_query = f"What are the key points covered in the document '{topic_clean}'?"
        content_response, _ = self.rag_query_engine.query(file_content_query)
        
        # Build prompt
        prompt = self._build_question_prompt(topic_clean, content_response)
        
        try:
            response = self.client.chat.completions.create(
                model="gpt-4o",
                messages=[
                    {"role": "system", "content": "You are an expert in creating educational assessment materials for automotive systems."},
                    {"role": "user", "content": prompt}
                ],
                temperature=0.4,
                response_format={"type": "json_object"}
            )
            
            response_text = response.choices[0].message.content
            
            try:
                questions_data = json.loads(response_text)
                question_dict = self._validate_and_format_questions(questions_data, topic_clean)
                return question_dict
            except json.JSONDecodeError as e:
                print(f"⚠️ Error parsing JSON: {e}")
                return self._create_fallback_questions(topic_clean)
        except Exception as e:
            print(f"❌ Error generating questions: {e}")
            return self._create_fallback_questions(topic_clean)
    
    def _build_question_prompt(self, topic_clean: str, content_response: str) -> str:
        """Build the prompt for question generation"""
        prompt = f"""You are a tester trying to come up with multiple choice questions from system users based on the input car manuals.

You are trying to make it not tricky, but at the same time not too easy. However, users' understanding of the system is your utmost priority.



Create 1 multiple choice question based on the manual file about '{topic_clean}' for each of the following levels of Bloom's taxonomy:

- Remember: {self.blooms_levels['remember']}

- Understand: {self.blooms_levels['understand']}

- Apply: {self.blooms_levels['apply']}

- Analyze: {self.blooms_levels['analyze']}

- Evaluate: {self.blooms_levels['evaluate']}

- Create: {self.blooms_levels['create']}



Try to find the most important and insightful content for each question. Do note where the right answer in the manual file is located.

Separate the questions and explanations (i.e., only write all the explanations at the end).

Please do not generate questions that give varying numbers as answers. Test users' concepts and understanding of the vehicle system.

Make sure there are no questions with possibility of two correct answers.



Try to have a definitive right answer. Be slow and steady.



Here is the content from the manual:

{content_response}



Output your response as a clean JSON object with these fields for each question:

- level (string): the Bloom's taxonomy level

- question_text (string): the full question text

- options (array): four answer choices as strings

- correct_option_index (integer): index of the correct answer (0-3)

- explanation (string): explanation of why the correct answer is right



Example JSON format:

{{

  "questions": [

    {{

      "level": "remember",

      "question_text": "What does DISTRONIC stand for?",

      "options": ["Distance Control", "Dynamic Intelligent Speed Tronic", "Direct Intelligence Control", "Digital Road Navigation Intelligence Control"],

      "correct_option_index": 1,

      "explanation": "DISTRONIC stands for Dynamic Intelligent Speed Tronic as stated in section 3.2 of the manual."

    }}

  ]

}}

"""
        return prompt
    
    def _validate_and_format_questions(self, questions_data: Dict, topic_clean: str) -> Dict[str, Dict]:
        """Validate and format questions, ensuring all levels are present"""
        expected_levels = ["remember", "understand", "apply", "analyze", "evaluate", "create"]
        question_dict = {}
        
        for q in questions_data.get("questions", []):
            level = q.get("level", "").lower()
            if level in expected_levels:
                question_dict[level] = q
        
        # Fill missing levels with fallback questions
        for level in expected_levels:
            if level not in question_dict:
                print(f"⚠️ Missing question for level: {level}")
                question_dict[level] = {
                    "level": level,
                    "question_text": f"Question for {level} level could not be generated.",
                    "options": ["Option A", "Option B", "Option C", "Option D"],
                    "correct_option_index": 0,
                    "explanation": "Please try again or select a different topic."
                }
        
        return question_dict
    
    def _create_fallback_questions(self, topic_name: str) -> Dict[str, Dict]:
        """Create fallback questions when generation fails"""
        fallback = {}
        for level in ["remember", "understand", "apply", "analyze", "evaluate", "create"]:
            fallback[level] = {
                "level": level,
                "question_text": f"What is a key feature of {topic_name}?",
                "options": [
                    f"Option A about {topic_name}",
                    f"Option B about {topic_name}",
                    f"Option C about {topic_name}",
                    f"Option D about {topic_name}"
                ],
                "correct_option_index": 0,
                "explanation": f"This is a fallback question for the {level} level. Please try again or select a different topic."
            }
        return fallback