Spaces:
Sleeping
Sleeping
File size: 8,914 Bytes
582bf6b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 |
# modules/mcq_generator.py
"""Enhanced MCQ Quiz Generator Module"""
import random
from typing import Dict, List, Tuple
from modules.api_utils import (
fetch_wikipedia_summary,
search_wikipedia,
fetch_related_topics,
fetch_wikipedia_categories,
)
from config.settings import RANDOM_TOPICS
from modules.hf_llm_generator import generate_quiz_set_with_llm as generate_quiz_set_from_llm
def generate_smart_distractors(
correct_answer: str, topic: str, context: str
) -> List[str]:
"""Generate intelligent distractor options"""
distractors = set()
# Get related topics
related = fetch_related_topics(topic, 10)
distractors.update(related)
# Get categories and use them for distractors
categories = fetch_wikipedia_categories(topic)
if categories:
# Search for other items in the same categories
for category in categories[:2]:
similar_items = search_wikipedia(category, 5)
distractors.update(similar_items)
# Remove the correct answer and topic
distractors.discard(correct_answer)
distractors.discard(topic)
# Convert to list and shuffle
distractor_list = list(distractors)
random.shuffle(distractor_list)
# If not enough distractors, add generic ones
if len(distractor_list) < 3:
generic_distractors = [
"Scientific Theory",
"Historical Event",
"Mathematical Concept",
"Geographical Location",
"Literary Work",
"Technological Innovation",
"Cultural Phenomenon",
"Economic System",
"Political Movement",
]
distractor_list.extend(generic_distractors)
return distractor_list[:3]
def generate_question_types(topic: str, summary_data: Dict, difficulty: str) -> Dict:
"""Generate different types of questions based on difficulty"""
title = summary_data.get("title", topic)
extract = summary_data.get("extract", "")
description = summary_data.get("description", "")
# Prioritize question/answer extraction from the extract for better relevance
sentences = [s.strip() for s in extract.split('.') if s.strip()]
if sentences:
# Try to find a sentence that defines the topic
definition_sentences = [s for s in sentences if title.lower() in s.lower() and ("is a" in s.lower() or "are" in s.lower() or "defined as" in s.lower())]
if definition_sentences:
question = f"What is {title}?"
correct_answer = definition_sentences[0]
if len(correct_answer) > 100: # Truncate if too long
correct_answer = correct_answer[:100] + "..."
return {"question": question, "correct_answer": correct_answer, "context": extract}
# Fallback to other sentences or description
if difficulty == "Easy":
question = f"What is {title}?"
correct_answer = description if description else title
elif difficulty == "Medium":
question = f"Explain {title}."
correct_answer = sentences[0] if sentences else title
else: # Hard
question = f"What is the significance of {title}?"
correct_answer = sentences[-1] if sentences else title # Last sentence for more detail
else: # If no sentences, fallback to title/description
if difficulty == "Easy":
question = f"What is {title}?"
correct_answer = title
elif difficulty == "Medium":
question = f"How is {title} commonly defined?"
correct_answer = description if description else title
else: # Hard
question = f"What is the key principle underlying {title}?"
correct_answer = description if description else title
return {"question": question, "correct_answer": correct_answer, "context": extract}
def generate_mcq(topic: str, difficulty: str) -> Dict:
"""Generate an enhanced multiple choice question"""
summary_data = fetch_wikipedia_summary(topic)
if not summary_data:
# Try searching for the topic
search_results = search_wikipedia(topic, 3)
if search_results:
# Try to pick the most relevant search result
found_topic = next((res for res in search_results if topic.lower() in res.lower()), search_results[0])
summary_data = fetch_wikipedia_summary(found_topic)
if summary_data:
topic = found_topic # Update topic if a better one was found
if not summary_data:
return {
"error": "Topic not found on Wikipedia. Try a different topic or check spelling.",
"status": False,
"suggestions": search_results if search_results else [],
}
# Generate question based on difficulty
question_data = generate_question_types(topic, summary_data, difficulty)
# Generate smart distractors
distractors = generate_smart_distractors(
question_data["correct_answer"], topic, question_data["context"]
)
# Create options
options = [question_data["correct_answer"]] + distractors[:3]
random.shuffle(options)
# Create explanation
extract = summary_data.get("extract", "")
explanation = extract[:300] + "..." if len(extract) > 300 else extract
return {
"question": question_data["question"],
"options": options,
"correct_answer": question_data["correct_answer"],
"explanation": explanation,
"topic": summary_data.get("title", topic),
"difficulty": difficulty,
"status": True,
}
def generate_quiz_set(
topic: str, difficulty: str, num_questions: int = 5, use_llm: bool = False
) -> List[Dict]:
"""Generate a set of questions for a complete quiz"""
if use_llm:
return generate_quiz_set_from_llm(topic, difficulty, num_questions)
questions = []
used_topics = set()
attempted_topics = set() # Keep track of all topics attempted in this call
# Start with the main topic and add related topics for variety
all_possible_topics = [topic] + fetch_related_topics(topic, 10)
# Add some random topics from the global list to ensure diversity if related topics are exhausted
all_possible_topics.extend(random.sample(RANDOM_TOPICS, min(5, len(RANDOM_TOPICS))))
random.shuffle(all_possible_topics) # Shuffle to randomize the order of attempt
topic_idx = 0
while len(questions) < num_questions and topic_idx < len(all_possible_topics):
current_topic = all_possible_topics[topic_idx]
# Only try to generate a question if we haven't already used this topic in this quiz set
# and it hasn't been attempted and failed too recently
if current_topic not in used_topics and current_topic not in attempted_topics:
question = generate_mcq(current_topic, difficulty)
if question.get("status"):
questions.append(question)
used_topics.add(current_topic)
# Reset attempted_topics for this branch if successful, to allow retrying later
attempted_topics.clear()
else:
attempted_topics.add(current_topic) # Mark as attempted and failed for this pass
topic_idx += 1
# If we've gone through all topics and still need questions,
# reset and try again with a fresh set of potential topics
if topic_idx >= len(all_possible_topics) and len(questions) < num_questions:
remaining_needed = num_questions - len(questions)
additional_random_topics = random.sample(RANDOM_TOPICS, min(remaining_needed + 5, len(RANDOM_TOPICS)))
all_possible_topics.extend([t for t in additional_random_topics if t not in used_topics and t not in attempted_topics])
random.shuffle(all_possible_topics)
topic_idx = 0 # Reset topic index for new pass
# Ensure we have exactly num_questions, even if some are repeated
# This loop is a fallback if the diverse topic generation wasn't enough
while len(questions) < num_questions:
fallback_topic = random.choice(list(used_topics) if used_topics else RANDOM_TOPICS)
question = generate_mcq(fallback_topic, difficulty)
if question.get("status"):
questions.append(question)
# Add a safeguard to prevent infinite loops if no questions can be generated
if not question.get("status") and len(questions) == 0:
break # Cannot generate any questions, break to prevent infinite loop
random.shuffle(questions) # Shuffle the final set of questions
return questions
|