Trivia5 / modules /mcq_generator.py
Bharath370's picture
Upload 60 files
44073c9 verified
# modules/mcq_generator.py
"""Enhanced MCQ Quiz Generator Module"""
import random
from typing import Dict, List, Tuple
from modules.api_utils import (
fetch_wikipedia_summary,
search_wikipedia,
fetch_related_topics,
fetch_wikipedia_categories,
)
from config.settings import RANDOM_TOPICS
from modules.hf_llm_generator import generate_quiz_set_with_llm as generate_quiz_set_from_llm
def generate_smart_distractors(
correct_answer: str, topic: str, context: str
) -> List[str]:
"""Generate intelligent distractor options"""
distractors = set()
# Get related topics
related = fetch_related_topics(topic, 10)
distractors.update(related)
# Get categories and use them for distractors
categories = fetch_wikipedia_categories(topic)
if categories:
# Search for other items in the same categories
for category in categories[:2]:
similar_items = search_wikipedia(category, 5)
distractors.update(similar_items)
# Remove the correct answer and topic
distractors.discard(correct_answer)
distractors.discard(topic)
# Convert to list and shuffle
distractor_list = list(distractors)
random.shuffle(distractor_list)
# If not enough distractors, add generic ones
if len(distractor_list) < 3:
generic_distractors = [
"Scientific Theory",
"Historical Event",
"Mathematical Concept",
"Geographical Location",
"Literary Work",
"Technological Innovation",
"Cultural Phenomenon",
"Economic System",
"Political Movement",
]
distractor_list.extend(generic_distractors)
return distractor_list[:3]
def generate_question_types(topic: str, summary_data: Dict, difficulty: str) -> Dict:
"""Generate different types of questions based on difficulty"""
title = summary_data.get("title", topic)
extract = summary_data.get("extract", "")
description = summary_data.get("description", "")
# Prioritize question/answer extraction from the extract for better relevance
sentences = [s.strip() for s in extract.split('.') if s.strip()]
if sentences:
# Try to find a sentence that defines the topic
definition_sentences = [s for s in sentences if title.lower() in s.lower() and ("is a" in s.lower() or "are" in s.lower() or "defined as" in s.lower())]
if definition_sentences:
question = f"What is {title}?"
correct_answer = definition_sentences[0]
if len(correct_answer) > 100: # Truncate if too long
correct_answer = correct_answer[:100] + "..."
return {"question": question, "correct_answer": correct_answer, "context": extract}
# Fallback to other sentences or description
if difficulty == "Easy":
question = f"What is {title}?"
correct_answer = description if description else title
elif difficulty == "Medium":
question = f"Explain {title}."
correct_answer = sentences[0] if sentences else title
else: # Hard
question = f"What is the significance of {title}?"
correct_answer = sentences[-1] if sentences else title # Last sentence for more detail
else: # If no sentences, fallback to title/description
if difficulty == "Easy":
question = f"What is {title}?"
correct_answer = title
elif difficulty == "Medium":
question = f"How is {title} commonly defined?"
correct_answer = description if description else title
else: # Hard
question = f"What is the key principle underlying {title}?"
correct_answer = description if description else title
return {"question": question, "correct_answer": correct_answer, "context": extract}
def generate_mcq(topic: str, difficulty: str) -> Dict:
"""Generate an enhanced multiple choice question"""
summary_data = fetch_wikipedia_summary(topic)
if not summary_data:
# Try searching for the topic
search_results = search_wikipedia(topic, 3)
if search_results:
# Try to pick the most relevant search result
found_topic = next((res for res in search_results if topic.lower() in res.lower()), search_results[0])
summary_data = fetch_wikipedia_summary(found_topic)
if summary_data:
topic = found_topic # Update topic if a better one was found
if not summary_data:
return {
"error": "Topic not found on Wikipedia. Try a different topic or check spelling.",
"status": False,
"suggestions": search_results if search_results else [],
}
# Generate question based on difficulty
question_data = generate_question_types(topic, summary_data, difficulty)
# Generate smart distractors
distractors = generate_smart_distractors(
question_data["correct_answer"], topic, question_data["context"]
)
# Create options
options = [question_data["correct_answer"]] + distractors[:3]
random.shuffle(options)
# Create explanation
extract = summary_data.get("extract", "")
explanation = extract[:300] + "..." if len(extract) > 300 else extract
return {
"question": question_data["question"],
"options": options,
"correct_answer": question_data["correct_answer"],
"explanation": explanation,
"topic": summary_data.get("title", topic),
"difficulty": difficulty,
"status": True,
}
def generate_quiz_set(
topic: str, difficulty: str, num_questions: int = 5, use_llm: bool = False
) -> List[Dict]:
"""Generate a set of questions for a complete quiz"""
if use_llm:
return generate_quiz_set_from_llm(topic, difficulty, num_questions)
questions = []
used_topics = set()
attempted_topics = set() # Keep track of all topics attempted in this call
# Start with the main topic and add related topics for variety
all_possible_topics = [topic] + fetch_related_topics(topic, 10)
# Add some random topics from the global list to ensure diversity if related topics are exhausted
all_possible_topics.extend(random.sample(RANDOM_TOPICS, min(5, len(RANDOM_TOPICS))))
random.shuffle(all_possible_topics) # Shuffle to randomize the order of attempt
topic_idx = 0
while len(questions) < num_questions and topic_idx < len(all_possible_topics):
current_topic = all_possible_topics[topic_idx]
# Only try to generate a question if we haven't already used this topic in this quiz set
# and it hasn't been attempted and failed too recently
if current_topic not in used_topics and current_topic not in attempted_topics:
question = generate_mcq(current_topic, difficulty)
if question.get("status"):
questions.append(question)
used_topics.add(current_topic)
# Reset attempted_topics for this branch if successful, to allow retrying later
attempted_topics.clear()
else:
attempted_topics.add(current_topic) # Mark as attempted and failed for this pass
topic_idx += 1
# If we've gone through all topics and still need questions,
# reset and try again with a fresh set of potential topics
if topic_idx >= len(all_possible_topics) and len(questions) < num_questions:
remaining_needed = num_questions - len(questions)
additional_random_topics = random.sample(RANDOM_TOPICS, min(remaining_needed + 5, len(RANDOM_TOPICS)))
all_possible_topics.extend([t for t in additional_random_topics if t not in used_topics and t not in attempted_topics])
random.shuffle(all_possible_topics)
topic_idx = 0 # Reset topic index for new pass
# Ensure we have exactly num_questions, even if some are repeated
# This loop is a fallback if the diverse topic generation wasn't enough
while len(questions) < num_questions:
fallback_topic = random.choice(list(used_topics) if used_topics else RANDOM_TOPICS)
question = generate_mcq(fallback_topic, difficulty)
if question.get("status"):
questions.append(question)
# Add a safeguard to prevent infinite loops if no questions can be generated
if not question.get("status") and len(questions) == 0:
break # Cannot generate any questions, break to prevent infinite loop
random.shuffle(questions) # Shuffle the final set of questions
return questions