Spaces:

Bharath370
/

Trivia5

Sleeping

File size: 8,914 Bytes

582bf6b

# modules/mcq_generator.py
"""Enhanced MCQ Quiz Generator Module"""

import random
from typing import Dict, List, Tuple
from modules.api_utils import (
    fetch_wikipedia_summary,
    search_wikipedia,
    fetch_related_topics,
    fetch_wikipedia_categories,
)
from config.settings import RANDOM_TOPICS
from modules.hf_llm_generator import generate_quiz_set_with_llm as generate_quiz_set_from_llm


def generate_smart_distractors(

    correct_answer: str, topic: str, context: str

) -> List[str]:
    """Generate intelligent distractor options"""
    distractors = set()

    # Get related topics
    related = fetch_related_topics(topic, 10)
    distractors.update(related)

    # Get categories and use them for distractors
    categories = fetch_wikipedia_categories(topic)
    if categories:
        # Search for other items in the same categories
        for category in categories[:2]:
            similar_items = search_wikipedia(category, 5)
            distractors.update(similar_items)

    # Remove the correct answer and topic
    distractors.discard(correct_answer)
    distractors.discard(topic)

    # Convert to list and shuffle
    distractor_list = list(distractors)
    random.shuffle(distractor_list)

    # If not enough distractors, add generic ones
    if len(distractor_list) < 3:
        generic_distractors = [
            "Scientific Theory",
            "Historical Event",
            "Mathematical Concept",
            "Geographical Location",
            "Literary Work",
            "Technological Innovation",
            "Cultural Phenomenon",
            "Economic System",
            "Political Movement",
        ]
        distractor_list.extend(generic_distractors)

    return distractor_list[:3]


def generate_question_types(topic: str, summary_data: Dict, difficulty: str) -> Dict:
    """Generate different types of questions based on difficulty"""
    title = summary_data.get("title", topic)
    extract = summary_data.get("extract", "")
    description = summary_data.get("description", "")

    # Prioritize question/answer extraction from the extract for better relevance
    sentences = [s.strip() for s in extract.split('.') if s.strip()]
    if sentences:
        # Try to find a sentence that defines the topic
        definition_sentences = [s for s in sentences if title.lower() in s.lower() and ("is a" in s.lower() or "are" in s.lower() or "defined as" in s.lower())]
        if definition_sentences:
            question = f"What is {title}?"
            correct_answer = definition_sentences[0]
            if len(correct_answer) > 100: # Truncate if too long
                correct_answer = correct_answer[:100] + "..."
            return {"question": question, "correct_answer": correct_answer, "context": extract}

        # Fallback to other sentences or description
        if difficulty == "Easy":
            question = f"What is {title}?"
            correct_answer = description if description else title
        elif difficulty == "Medium":
            question = f"Explain {title}."
            correct_answer = sentences[0] if sentences else title
        else: # Hard
            question = f"What is the significance of {title}?"
            correct_answer = sentences[-1] if sentences else title # Last sentence for more detail
    else: # If no sentences, fallback to title/description
        if difficulty == "Easy":
            question = f"What is {title}?"
            correct_answer = title
        elif difficulty == "Medium":
            question = f"How is {title} commonly defined?"
            correct_answer = description if description else title
        else: # Hard
            question = f"What is the key principle underlying {title}?"
            correct_answer = description if description else title

    return {"question": question, "correct_answer": correct_answer, "context": extract}


def generate_mcq(topic: str, difficulty: str) -> Dict:
    """Generate an enhanced multiple choice question"""
    summary_data = fetch_wikipedia_summary(topic)

    if not summary_data:
        # Try searching for the topic
        search_results = search_wikipedia(topic, 3)
        if search_results:
            # Try to pick the most relevant search result
            found_topic = next((res for res in search_results if topic.lower() in res.lower()), search_results[0])
            summary_data = fetch_wikipedia_summary(found_topic)
            if summary_data:
                topic = found_topic # Update topic if a better one was found

        if not summary_data:
            return {
                "error": "Topic not found on Wikipedia. Try a different topic or check spelling.",
                "status": False,
                "suggestions": search_results if search_results else [],
            }

    # Generate question based on difficulty
    question_data = generate_question_types(topic, summary_data, difficulty)

    # Generate smart distractors
    distractors = generate_smart_distractors(
        question_data["correct_answer"], topic, question_data["context"]
    )

    # Create options
    options = [question_data["correct_answer"]] + distractors[:3]
    random.shuffle(options)

    # Create explanation
    extract = summary_data.get("extract", "")
    explanation = extract[:300] + "..." if len(extract) > 300 else extract

    return {
        "question": question_data["question"],
        "options": options,
        "correct_answer": question_data["correct_answer"],
        "explanation": explanation,
        "topic": summary_data.get("title", topic),
        "difficulty": difficulty,
        "status": True,
    }


def generate_quiz_set(

    topic: str, difficulty: str, num_questions: int = 5, use_llm: bool = False

) -> List[Dict]:
    """Generate a set of questions for a complete quiz"""
    if use_llm:
        return generate_quiz_set_from_llm(topic, difficulty, num_questions)

    questions = []
    used_topics = set()
    attempted_topics = set() # Keep track of all topics attempted in this call

    # Start with the main topic and add related topics for variety
    all_possible_topics = [topic] + fetch_related_topics(topic, 10)
    # Add some random topics from the global list to ensure diversity if related topics are exhausted
    all_possible_topics.extend(random.sample(RANDOM_TOPICS, min(5, len(RANDOM_TOPICS))))
    random.shuffle(all_possible_topics) # Shuffle to randomize the order of attempt

    topic_idx = 0
    while len(questions) < num_questions and topic_idx < len(all_possible_topics):
        current_topic = all_possible_topics[topic_idx]

        # Only try to generate a question if we haven't already used this topic in this quiz set
        # and it hasn't been attempted and failed too recently
        if current_topic not in used_topics and current_topic not in attempted_topics:
            question = generate_mcq(current_topic, difficulty)
            if question.get("status"):
                questions.append(question)
                used_topics.add(current_topic)
                # Reset attempted_topics for this branch if successful, to allow retrying later
                attempted_topics.clear()
            else:
                attempted_topics.add(current_topic) # Mark as attempted and failed for this pass
        topic_idx += 1
        
        # If we've gone through all topics and still need questions,
        # reset and try again with a fresh set of potential topics
        if topic_idx >= len(all_possible_topics) and len(questions) < num_questions:
            remaining_needed = num_questions - len(questions)
            additional_random_topics = random.sample(RANDOM_TOPICS, min(remaining_needed + 5, len(RANDOM_TOPICS)))
            all_possible_topics.extend([t for t in additional_random_topics if t not in used_topics and t not in attempted_topics])
            random.shuffle(all_possible_topics)
            topic_idx = 0 # Reset topic index for new pass


    # Ensure we have exactly num_questions, even if some are repeated
    # This loop is a fallback if the diverse topic generation wasn't enough
    while len(questions) < num_questions:
        fallback_topic = random.choice(list(used_topics) if used_topics else RANDOM_TOPICS)
        question = generate_mcq(fallback_topic, difficulty)
        if question.get("status"):
            questions.append(question)
        # Add a safeguard to prevent infinite loops if no questions can be generated
        if not question.get("status") and len(questions) == 0:
            break # Cannot generate any questions, break to prevent infinite loop

    random.shuffle(questions) # Shuffle the final set of questions
    return questions