Spaces:
Sleeping
Sleeping
File size: 17,269 Bytes
300f197 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 | import json
import os
from datetime import datetime
from text_processor import TextProcessor
from keyword_extractor import KeywordExtractor
from question_generator import QuestionGenerator
from option_generator import OptionGenerator
from syllabus_processor import SyllabusProcessor
class ExamQuestionSystem:
def __init__(self, use_transformers=True):
"""Initialize the complete exam question generation system.
Args:
use_transformers: Whether to use transformer models for question generation
"""
print("Initializing Exam Question Generation System...")
self.text_processor = TextProcessor()
self.keyword_extractor = KeywordExtractor()
# Use rule-based generation by default for faster web deployment
self.question_generator = QuestionGenerator(use_transformers=use_transformers)
self.option_generator = OptionGenerator()
self.syllabus_processor = SyllabusProcessor()
print("System initialized successfully!")
def process_text_file(self, file_path):
"""
Process a text file and return its content.
Args:
file_path (str): Path to the text file
Returns:
str: File content
"""
try:
with open(file_path, 'r', encoding='utf-8') as file:
return file.read()
except Exception as e:
raise Exception(f"Error reading file {file_path}: {e}")
def generate_exam_questions(self, input_text, max_questions=5, include_mcq=True, syllabus_text=None):
"""
Complete pipeline to generate exam questions from input text.
Args:
input_text (str): Input text to generate questions from
max_questions (int): Maximum number of questions to generate
include_mcq (bool): Whether to include multiple choice options
syllabus_text (str, optional): Syllabus text for topic-based question generation
Returns:
dict: Generated questions and metadata
"""
print("Starting question generation pipeline...")
try:
if not input_text or not input_text.strip():
raise ValueError("Input text cannot be empty")
# If syllabus text is provided, try syllabus-based generation
if syllabus_text and syllabus_text.strip():
syllabus_results = self._generate_syllabus_based_questions(input_text, syllabus_text, max_questions, include_mcq)
if syllabus_results and syllabus_results.get('questions'):
return syllabus_results
print("Warning: Syllabus-based generation produced no questions. Falling back to standard generation.")
# Otherwise use the standard generation approach
if not input_text or not input_text.strip():
raise ValueError("Input text is empty or contains only whitespace")
print(f"Input text length: {len(input_text)} characters")
# Step 1: Text preprocessing
print("1. Processing and cleaning text...")
processed_data = self.text_processor.preprocess_text(input_text)
if not processed_data or 'sentences' not in processed_data or not processed_data['sentences']:
raise ValueError("Failed to process input text into sentences")
print(f"Extracted {len(processed_data['sentences'])} sentences from input")
# Step 2: Extract keywords and important sentences
print("2. Extracting keywords and important sentences...")
key_concepts = self.keyword_extractor.extract_key_concepts(
processed_data['cleaned_text'],
processed_data['sentences'],
top_n_sentences=max(10, max_questions)
)
if not key_concepts or 'important_sentences' not in key_concepts or not key_concepts['important_sentences']:
# If no important sentences found, use the first few sentences
print("Warning: No important sentences found, using first few sentences")
key_concepts['important_sentences'] = processed_data['sentences'][:max_questions]
print(f"Found {len(key_concepts.get('important_sentences', []))} important sentences")
# Prepare sentences and keywords for generation
generation_inputs = []
if key_concepts and 'important_sentences' in key_concepts:
for item in key_concepts['important_sentences']:
if isinstance(item, tuple) and len(item) >= 2:
# item is (score, sentence, keyword) or (score, sentence)
sentence = item[1]
keyword = item[2] if len(item) > 2 else None
generation_inputs.append({'context': sentence, 'answer': keyword})
elif isinstance(item, str):
generation_inputs.append({'context': item, 'answer': None})
# Step 3: Generate questions
print("3. Generating questions...")
questions = []
# Generate more questions than requested to ensure we have enough valid ones
# and to cover all sections (MCQ, Short, Long)
generation_target = max(max_questions * 2, 10)
try:
questions = self.question_generator.generate_multiple_questions(
generation_inputs,
generation_target
)
# Ensure we have a list of questions
if not questions:
raise ValueError("No questions were generated")
# Convert string questions to proper format
formatted_questions = []
for i, q in enumerate(questions):
if isinstance(q, str):
formatted_q = {
'question': q,
'context': 'Generated from input text',
'score': 1.0,
'type': 'short_answer',
'id': f'q_{i+1}'
}
formatted_questions.append(formatted_q)
elif isinstance(q, dict):
# Ensure required fields exist
q['question'] = q.get('question', f'Question {i+1}')
q['context'] = q.get('context', 'No context provided')
q['score'] = q.get('score', 1.0)
q['type'] = q.get('type', 'short_answer')
q['id'] = q.get('id', f'q_{i+1}')
formatted_questions.append(q)
questions = formatted_questions
# Step 4: Generate MCQ options if requested and we have enough questions
if include_mcq and questions:
print("4. Generating multiple choice options...")
# Extract global keywords for distractors
global_keywords = [k[1] for k in key_concepts.get('keywords', [])]
for question_data in questions[:max_questions]: # Limit to max_questions
try:
mcq_data = self.option_generator.create_mcq_options(
question_data['question'],
question_data['context'],
correct_answer=question_data.get('correct_answer'),
global_keywords=global_keywords
)
if mcq_data and 'options' in mcq_data and len(mcq_data['options']) >= 2:
question_data.update(mcq_data)
question_data['type'] = 'mcq'
print(f"Generated {len(mcq_data['options'])} options for question")
else:
print("Not enough options generated, keeping as short answer")
except Exception as e:
print(f"Error generating MCQ options: {str(e)}"
" (continuing with short answer)")
except Exception as e:
import traceback
print(f"Error in question generation: {str(e)}\n{traceback.format_exc()}")
# Create fallback questions
questions = [{
'question': f"Sample question {i+1} (error: {str(e)[:50]}...)",
'context': 'Error occurred during question generation',
'score': 0.0,
'type': 'error',
'id': f'error_{i}'
} for i in range(min(3, max_questions))]
# Compile results
results = {
'metadata': {
'input_word_count': processed_data.get('word_count', 0),
'input_sentence_count': len(processed_data.get('sentences', [])),
'questions_generated': len(questions),
'keywords_extracted': len(key_concepts.get('keywords', [])),
'named_entities': len(key_concepts.get('named_entities', []))
},
'keywords': key_concepts.get('keywords', [])[:10],
'named_entities': key_concepts.get('named_entities', [])[:10],
'questions': questions[:max_questions] # Ensure we don't return more than requested
}
print(f"Successfully generated {len(results['questions'])} questions")
return results
except Exception as e:
import traceback
error_msg = f"Error in generate_exam_questions: {str(e)}\n{traceback.format_exc()}"
print(error_msg)
# Return a minimal response with error information
return {
'metadata': {
'error': str(e),
'input_length': len(input_text) if input_text else 0,
'questions_generated': 0
},
'keywords': [],
'named_entities': [],
'questions': [{
'question': f"Error generating questions: {str(e)[:100]}",
'context': 'An error occurred during question generation',
'score': 0.0,
'type': 'error',
'id': 'error_0'
}]
}
print(f"β
Generated {len(questions)} questions successfully!")
return results
def save_questions_to_json(self, questions_data, output_file):
"""
Save generated questions to a JSON file.
Args:
questions_data (dict): Generated questions data
output_file (str): Output file path
"""
try:
with open(output_file, 'w', encoding='utf-8') as file:
json.dump(questions_data, file, indent=2, ensure_ascii=False)
print(f"β
Questions saved to {output_file}")
except Exception as e:
print(f"β Error saving to file: {e}")
def display_questions_console(self, questions_data):
"""
Display generated questions in a formatted console output.
Args:
questions_data (dict): Generated questions data
"""
print("\n" + "="*80)
print("GENERATED EXAM QUESTIONS")
print("="*80)
# Display metadata
metadata = questions_data['metadata']
print(f"\nπ STATISTICS:")
print(f" β’ Input text: {metadata['input_word_count']} words, {metadata['input_sentence_count']} sentences")
print(f" β’ Keywords extracted: {metadata['keywords_extracted']}")
print(f" β’ Named entities found: {metadata['named_entities']}")
print(f" β’ Questions generated: {metadata['questions_generated']}")
# Display top keywords
print(f"\nπ TOP KEYWORDS:")
for score, keyword in questions_data['keywords'][:5]:
print(f" β’ {keyword} (score: {score:.2f})")
# Display questions
print(f"\nβ QUESTIONS:")
for i, q in enumerate(questions_data['questions'], 1):
print(f"\n{i}. {q['question']}")
if 'options' in q:
print(" Options:")
for j, option in enumerate(q['options'], 1):
marker = "β" if j-1 == q['correct_index'] else " "
print(f" {marker} {chr(64+j)}. {option}")
print(f" Context: {q['context'][:100]}...")
print(f" Confidence: {q['score']:.2f}")
print("\n" + "="*80)
def _generate_syllabus_based_questions(self, content_text, syllabus_text, max_questions=10, include_mcq=True):
"""
Generate questions based on syllabus topics.
Args:
content_text (str): The content text to generate questions from
syllabus_text (str): The syllabus text with units and topics
max_questions (int): Maximum number of questions to generate
include_mcq (bool): Whether to include multiple choice options
Returns:
dict: Generated questions and metadata
"""
print("Generating syllabus-based questions...")
try:
# Generate questions by topic
questions_by_topic = self.syllabus_processor.generate_topic_based_questions(
syllabus_text=syllabus_text,
content_text=content_text,
questions_per_topic=3 # Will be adjusted based on max_questions
)
# Flatten questions from all topics
all_questions = []
for topic, questions in questions_by_topic.items():
for q in questions:
q['topic'] = topic
all_questions.append(q)
# Limit to max_questions
all_questions = all_questions[:max_questions]
# Generate options for MCQs if needed
if include_mcq:
for question in all_questions:
if 'options' not in question and 'context' in question:
try:
mcq_data = self.option_generator.create_mcq_options(
question['question'],
question['context'],
num_options=4
)
if mcq_data and 'options' in mcq_data and len(mcq_data['options']) >= 2:
question.update(mcq_data)
except Exception as e:
print(f"Error generating options: {e}")
# Prepare results
results = {
'metadata': {
'total_questions': len(all_questions),
'topics_covered': list(questions_by_topic.keys()),
'generated_at': str(datetime.now())
},
'questions': all_questions
}
return results
except Exception as e:
print(f"Error in syllabus-based question generation: {e}")
raise
# Example usage and testing
if __name__ == "__main__":
# Sample text for testing
sample_text = """
Artificial Intelligence (AI) is a branch of computer science that aims to create intelligent machines
that work and react like humans. Machine learning is a subset of AI that provides systems the ability
to automatically learn and improve from experience without being explicitly programmed. Deep learning
is a subset of machine learning that uses neural networks with three or more layers. These neural
networks attempt to simulate the behavior of the human brain to learn from large amounts of data.
Python is one of the most popular programming languages for AI development due to its simplicity
and extensive libraries like TensorFlow and PyTorch.
"""
try:
# Initialize system
system = ExamQuestionSystem()
# Generate questions
results = system.generate_exam_questions(sample_text, max_questions=3)
# Display results
system.display_questions_console(results)
# Save to JSON
system.save_questions_to_json(results, "sample_questions.json")
except Exception as e:
print(f"β Error: {e}")
|