Spaces:

Bharath370
/

Trivia5

Sleeping

App Files Files Community

Trivia5 / src /modules /mcq_generator.py

Bharath370

Upload 102 files

582bf6b verified 7 months ago

raw

history blame contribute delete

8.91 kB

	# modules/mcq_generator.py
	"""Enhanced MCQ Quiz Generator Module"""

	import random
	from typing import Dict, List, Tuple
	from modules.api_utils import (
	fetch_wikipedia_summary,
	search_wikipedia,
	fetch_related_topics,
	fetch_wikipedia_categories,
	)
	from config.settings import RANDOM_TOPICS
	from modules.hf_llm_generator import generate_quiz_set_with_llm as generate_quiz_set_from_llm


	def generate_smart_distractors(
	correct_answer: str, topic: str, context: str
	) -> List[str]:
	"""Generate intelligent distractor options"""
	distractors = set()

	# Get related topics
	related = fetch_related_topics(topic, 10)
	distractors.update(related)

	# Get categories and use them for distractors
	categories = fetch_wikipedia_categories(topic)
	if categories:
	# Search for other items in the same categories
	for category in categories[:2]:
	similar_items = search_wikipedia(category, 5)
	distractors.update(similar_items)

	# Remove the correct answer and topic
	distractors.discard(correct_answer)
	distractors.discard(topic)

	# Convert to list and shuffle
	distractor_list = list(distractors)
	random.shuffle(distractor_list)

	# If not enough distractors, add generic ones
	if len(distractor_list) < 3:
	generic_distractors = [
	"Scientific Theory",
	"Historical Event",
	"Mathematical Concept",
	"Geographical Location",
	"Literary Work",
	"Technological Innovation",
	"Cultural Phenomenon",
	"Economic System",
	"Political Movement",
	]
	distractor_list.extend(generic_distractors)

	return distractor_list[:3]


	def generate_question_types(topic: str, summary_data: Dict, difficulty: str) -> Dict:
	"""Generate different types of questions based on difficulty"""
	title = summary_data.get("title", topic)
	extract = summary_data.get("extract", "")
	description = summary_data.get("description", "")

	# Prioritize question/answer extraction from the extract for better relevance
	sentences = [s.strip() for s in extract.split('.') if s.strip()]
	if sentences:
	# Try to find a sentence that defines the topic
	definition_sentences = [s for s in sentences if title.lower() in s.lower() and ("is a" in s.lower() or "are" in s.lower() or "defined as" in s.lower())]
	if definition_sentences:
	question = f"What is {title}?"
	correct_answer = definition_sentences[0]
	if len(correct_answer) > 100: # Truncate if too long
	correct_answer = correct_answer[:100] + "..."
	return {"question": question, "correct_answer": correct_answer, "context": extract}

	# Fallback to other sentences or description
	if difficulty == "Easy":
	question = f"What is {title}?"
	correct_answer = description if description else title
	elif difficulty == "Medium":
	question = f"Explain {title}."
	correct_answer = sentences[0] if sentences else title
	else: # Hard
	question = f"What is the significance of {title}?"
	correct_answer = sentences[-1] if sentences else title # Last sentence for more detail
	else: # If no sentences, fallback to title/description
	if difficulty == "Easy":
	question = f"What is {title}?"
	correct_answer = title
	elif difficulty == "Medium":
	question = f"How is {title} commonly defined?"
	correct_answer = description if description else title
	else: # Hard
	question = f"What is the key principle underlying {title}?"
	correct_answer = description if description else title

	return {"question": question, "correct_answer": correct_answer, "context": extract}


	def generate_mcq(topic: str, difficulty: str) -> Dict:
	"""Generate an enhanced multiple choice question"""
	summary_data = fetch_wikipedia_summary(topic)

	if not summary_data:
	# Try searching for the topic
	search_results = search_wikipedia(topic, 3)
	if search_results:
	# Try to pick the most relevant search result
	found_topic = next((res for res in search_results if topic.lower() in res.lower()), search_results[0])
	summary_data = fetch_wikipedia_summary(found_topic)
	if summary_data:
	topic = found_topic # Update topic if a better one was found

	if not summary_data:
	return {
	"error": "Topic not found on Wikipedia. Try a different topic or check spelling.",
	"status": False,
	"suggestions": search_results if search_results else [],
	}

	# Generate question based on difficulty
	question_data = generate_question_types(topic, summary_data, difficulty)

	# Generate smart distractors
	distractors = generate_smart_distractors(
	question_data["correct_answer"], topic, question_data["context"]
	)

	# Create options
	options = [question_data["correct_answer"]] + distractors[:3]
	random.shuffle(options)

	# Create explanation
	extract = summary_data.get("extract", "")
	explanation = extract[:300] + "..." if len(extract) > 300 else extract

	return {
	"question": question_data["question"],
	"options": options,
	"correct_answer": question_data["correct_answer"],
	"explanation": explanation,
	"topic": summary_data.get("title", topic),
	"difficulty": difficulty,
	"status": True,
	}


	def generate_quiz_set(
	topic: str, difficulty: str, num_questions: int = 5, use_llm: bool = False
	) -> List[Dict]:
	"""Generate a set of questions for a complete quiz"""
	if use_llm:
	return generate_quiz_set_from_llm(topic, difficulty, num_questions)

	questions = []
	used_topics = set()
	attempted_topics = set() # Keep track of all topics attempted in this call

	# Start with the main topic and add related topics for variety
	all_possible_topics = [topic] + fetch_related_topics(topic, 10)
	# Add some random topics from the global list to ensure diversity if related topics are exhausted
	all_possible_topics.extend(random.sample(RANDOM_TOPICS, min(5, len(RANDOM_TOPICS))))
	random.shuffle(all_possible_topics) # Shuffle to randomize the order of attempt

	topic_idx = 0
	while len(questions) < num_questions and topic_idx < len(all_possible_topics):
	current_topic = all_possible_topics[topic_idx]

	# Only try to generate a question if we haven't already used this topic in this quiz set
	# and it hasn't been attempted and failed too recently
	if current_topic not in used_topics and current_topic not in attempted_topics:
	question = generate_mcq(current_topic, difficulty)
	if question.get("status"):
	questions.append(question)
	used_topics.add(current_topic)
	# Reset attempted_topics for this branch if successful, to allow retrying later
	attempted_topics.clear()
	else:
	attempted_topics.add(current_topic) # Mark as attempted and failed for this pass
	topic_idx += 1

	# If we've gone through all topics and still need questions,
	# reset and try again with a fresh set of potential topics
	if topic_idx >= len(all_possible_topics) and len(questions) < num_questions:
	remaining_needed = num_questions - len(questions)
	additional_random_topics = random.sample(RANDOM_TOPICS, min(remaining_needed + 5, len(RANDOM_TOPICS)))
	all_possible_topics.extend([t for t in additional_random_topics if t not in used_topics and t not in attempted_topics])
	random.shuffle(all_possible_topics)
	topic_idx = 0 # Reset topic index for new pass


	# Ensure we have exactly num_questions, even if some are repeated
	# This loop is a fallback if the diverse topic generation wasn't enough
	while len(questions) < num_questions:
	fallback_topic = random.choice(list(used_topics) if used_topics else RANDOM_TOPICS)
	question = generate_mcq(fallback_topic, difficulty)
	if question.get("status"):
	questions.append(question)
	# Add a safeguard to prevent infinite loops if no questions can be generated
	if not question.get("status") and len(questions) == 0:
	break # Cannot generate any questions, break to prevent infinite loop

	random.shuffle(questions) # Shuffle the final set of questions
	return questions