File size: 8,914 Bytes
5af4179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
# modules/mcq_generator.py
"""Enhanced MCQ Quiz Generator Module"""

import random
from typing import Dict, List, Tuple
from modules.api_utils import (
    fetch_wikipedia_summary,
    search_wikipedia,
    fetch_related_topics,
    fetch_wikipedia_categories,
)
from config.settings import RANDOM_TOPICS
from modules.hf_llm_generator import generate_quiz_set_with_llm as generate_quiz_set_from_llm


def generate_smart_distractors(

    correct_answer: str, topic: str, context: str

) -> List[str]:
    """Generate intelligent distractor options"""
    distractors = set()

    # Get related topics
    related = fetch_related_topics(topic, 10)
    distractors.update(related)

    # Get categories and use them for distractors
    categories = fetch_wikipedia_categories(topic)
    if categories:
        # Search for other items in the same categories
        for category in categories[:2]:
            similar_items = search_wikipedia(category, 5)
            distractors.update(similar_items)

    # Remove the correct answer and topic
    distractors.discard(correct_answer)
    distractors.discard(topic)

    # Convert to list and shuffle
    distractor_list = list(distractors)
    random.shuffle(distractor_list)

    # If not enough distractors, add generic ones
    if len(distractor_list) < 3:
        generic_distractors = [
            "Scientific Theory",
            "Historical Event",
            "Mathematical Concept",
            "Geographical Location",
            "Literary Work",
            "Technological Innovation",
            "Cultural Phenomenon",
            "Economic System",
            "Political Movement",
        ]
        distractor_list.extend(generic_distractors)

    return distractor_list[:3]


def generate_question_types(topic: str, summary_data: Dict, difficulty: str) -> Dict:
    """Generate different types of questions based on difficulty"""
    title = summary_data.get("title", topic)
    extract = summary_data.get("extract", "")
    description = summary_data.get("description", "")

    # Prioritize question/answer extraction from the extract for better relevance
    sentences = [s.strip() for s in extract.split('.') if s.strip()]
    if sentences:
        # Try to find a sentence that defines the topic
        definition_sentences = [s for s in sentences if title.lower() in s.lower() and ("is a" in s.lower() or "are" in s.lower() or "defined as" in s.lower())]
        if definition_sentences:
            question = f"What is {title}?"
            correct_answer = definition_sentences[0]
            if len(correct_answer) > 100: # Truncate if too long
                correct_answer = correct_answer[:100] + "..."
            return {"question": question, "correct_answer": correct_answer, "context": extract}

        # Fallback to other sentences or description
        if difficulty == "Easy":
            question = f"What is {title}?"
            correct_answer = description if description else title
        elif difficulty == "Medium":
            question = f"Explain {title}."
            correct_answer = sentences[0] if sentences else title
        else: # Hard
            question = f"What is the significance of {title}?"
            correct_answer = sentences[-1] if sentences else title # Last sentence for more detail
    else: # If no sentences, fallback to title/description
        if difficulty == "Easy":
            question = f"What is {title}?"
            correct_answer = title
        elif difficulty == "Medium":
            question = f"How is {title} commonly defined?"
            correct_answer = description if description else title
        else: # Hard
            question = f"What is the key principle underlying {title}?"
            correct_answer = description if description else title

    return {"question": question, "correct_answer": correct_answer, "context": extract}


def generate_mcq(topic: str, difficulty: str) -> Dict:
    """Generate an enhanced multiple choice question"""
    summary_data = fetch_wikipedia_summary(topic)

    if not summary_data:
        # Try searching for the topic
        search_results = search_wikipedia(topic, 3)
        if search_results:
            # Try to pick the most relevant search result
            found_topic = next((res for res in search_results if topic.lower() in res.lower()), search_results[0])
            summary_data = fetch_wikipedia_summary(found_topic)
            if summary_data:
                topic = found_topic # Update topic if a better one was found

        if not summary_data:
            return {
                "error": "Topic not found on Wikipedia. Try a different topic or check spelling.",
                "status": False,
                "suggestions": search_results if search_results else [],
            }

    # Generate question based on difficulty
    question_data = generate_question_types(topic, summary_data, difficulty)

    # Generate smart distractors
    distractors = generate_smart_distractors(
        question_data["correct_answer"], topic, question_data["context"]
    )

    # Create options
    options = [question_data["correct_answer"]] + distractors[:3]
    random.shuffle(options)

    # Create explanation
    extract = summary_data.get("extract", "")
    explanation = extract[:300] + "..." if len(extract) > 300 else extract

    return {
        "question": question_data["question"],
        "options": options,
        "correct_answer": question_data["correct_answer"],
        "explanation": explanation,
        "topic": summary_data.get("title", topic),
        "difficulty": difficulty,
        "status": True,
    }


def generate_quiz_set(

    topic: str, difficulty: str, num_questions: int = 5, use_llm: bool = False

) -> List[Dict]:
    """Generate a set of questions for a complete quiz"""
    if use_llm:
        return generate_quiz_set_from_llm(topic, difficulty, num_questions)

    questions = []
    used_topics = set()
    attempted_topics = set() # Keep track of all topics attempted in this call

    # Start with the main topic and add related topics for variety
    all_possible_topics = [topic] + fetch_related_topics(topic, 10)
    # Add some random topics from the global list to ensure diversity if related topics are exhausted
    all_possible_topics.extend(random.sample(RANDOM_TOPICS, min(5, len(RANDOM_TOPICS))))
    random.shuffle(all_possible_topics) # Shuffle to randomize the order of attempt

    topic_idx = 0
    while len(questions) < num_questions and topic_idx < len(all_possible_topics):
        current_topic = all_possible_topics[topic_idx]

        # Only try to generate a question if we haven't already used this topic in this quiz set
        # and it hasn't been attempted and failed too recently
        if current_topic not in used_topics and current_topic not in attempted_topics:
            question = generate_mcq(current_topic, difficulty)
            if question.get("status"):
                questions.append(question)
                used_topics.add(current_topic)
                # Reset attempted_topics for this branch if successful, to allow retrying later
                attempted_topics.clear()
            else:
                attempted_topics.add(current_topic) # Mark as attempted and failed for this pass
        topic_idx += 1
        
        # If we've gone through all topics and still need questions,
        # reset and try again with a fresh set of potential topics
        if topic_idx >= len(all_possible_topics) and len(questions) < num_questions:
            remaining_needed = num_questions - len(questions)
            additional_random_topics = random.sample(RANDOM_TOPICS, min(remaining_needed + 5, len(RANDOM_TOPICS)))
            all_possible_topics.extend([t for t in additional_random_topics if t not in used_topics and t not in attempted_topics])
            random.shuffle(all_possible_topics)
            topic_idx = 0 # Reset topic index for new pass


    # Ensure we have exactly num_questions, even if some are repeated
    # This loop is a fallback if the diverse topic generation wasn't enough
    while len(questions) < num_questions:
        fallback_topic = random.choice(list(used_topics) if used_topics else RANDOM_TOPICS)
        question = generate_mcq(fallback_topic, difficulty)
        if question.get("status"):
            questions.append(question)
        # Add a safeguard to prevent infinite loops if no questions can be generated
        if not question.get("status") and len(questions) == 0:
            break # Cannot generate any questions, break to prevent infinite loop

    random.shuffle(questions) # Shuffle the final set of questions
    return questions