Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import fitz # PyMuPDF | |
| from transformers import pipeline | |
| import tempfile | |
| import os | |
| import re | |
| # Load the model | |
| quiz_generator = pipeline("text2text-generation", model="google/flan-t5-base") | |
| def extract_text_from_pdf(pdf_file): | |
| """Extract text from PDF file""" | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: | |
| temp_file.write(pdf_file) | |
| temp_path = temp_file.name | |
| try: | |
| doc = fitz.open(temp_path) | |
| text = "" | |
| for page in doc: | |
| text += page.get_text() | |
| doc.close() | |
| return text | |
| finally: | |
| # Clean up temporary file | |
| if os.path.exists(temp_path): | |
| os.unlink(temp_path) | |
| def chunk_text(text, chunk_size=800): | |
| """Split text into chunks for better processing""" | |
| words = text.split() | |
| chunks = [] | |
| current_chunk = [] | |
| current_length = 0 | |
| for word in words: | |
| if current_length + len(word) > chunk_size and current_chunk: | |
| chunks.append(' '.join(current_chunk)) | |
| current_chunk = [word] | |
| current_length = len(word) | |
| else: | |
| current_chunk.append(word) | |
| current_length += len(word) + 1 | |
| if current_chunk: | |
| chunks.append(' '.join(current_chunk)) | |
| return chunks | |
| def generate_quiz(pdf_bytes, num_questions=10): | |
| """Generate quiz from PDF""" | |
| if pdf_bytes is None: | |
| return "Error: Please upload a PDF file first." | |
| try: | |
| # Extract text from PDF | |
| text = extract_text_from_pdf(pdf_bytes) | |
| if not text.strip(): | |
| return "Error: No text found in the PDF. Please check if the PDF contains readable text." | |
| # Clean the text | |
| text = re.sub(r'\s+', ' ', text).strip() | |
| # Split text into chunks for better processing | |
| chunks = chunk_text(text, chunk_size=1000) | |
| all_questions = [] | |
| questions_per_chunk = max(1, num_questions // len(chunks)) | |
| for i, chunk in enumerate(chunks[:3]): # Process max 3 chunks to avoid timeout | |
| remaining_questions = num_questions - len(all_questions) | |
| if remaining_questions <= 0: | |
| break | |
| questions_to_generate = min(questions_per_chunk, remaining_questions) | |
| if i == len(chunks) - 1: # Last chunk gets remaining questions | |
| questions_to_generate = remaining_questions | |
| input_prompt = f"""Create {questions_to_generate} multiple choice questions from this text. | |
| Format each question as: | |
| Q1. [Question text] | |
| A) [Option A] | |
| B) [Option B] | |
| C) [Option C] | |
| D) [Option D] | |
| Correct Answer: [Letter] - [Explanation] | |
| Text: {chunk}""" | |
| try: | |
| result = quiz_generator( | |
| input_prompt, | |
| max_length=512, | |
| do_sample=True, | |
| temperature=0.7, | |
| top_p=0.9 | |
| )[0]['generated_text'] | |
| all_questions.append(result) | |
| except Exception as e: | |
| print(f"Error generating questions for chunk {i+1}: {str(e)}") | |
| continue | |
| if not all_questions: | |
| return "Error: Failed to generate questions. The text might be too complex or the model encountered an error." | |
| # Combine all questions | |
| final_quiz = "\n\n".join(all_questions) | |
| # Format the output | |
| formatted_quiz = f"""QUIZ GENERATED FROM YOUR PDF | |
| ============================================================ | |
| {final_quiz} | |
| ============================================================ | |
| Total Questions Generated: {len(all_questions)} sections | |
| Note: Review each question carefully and verify the answers. | |
| """ | |
| return formatted_quiz | |
| except Exception as e: | |
| return f"Error processing PDF: {str(e)}\n\nPlease make sure you uploaded a valid PDF file." | |
| # Create Gradio interface | |
| def create_interface(): | |
| with gr.Blocks(title="PDF Quiz Generator") as interface: | |
| gr.Markdown(""" | |
| # PDF to Quiz Generator | |
| Upload a PDF chapter and generate multiple choice questions with answers and explanations. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| pdf_input = gr.File( | |
| label="Upload PDF Chapter", | |
| file_types=[".pdf"], | |
| type="binary" | |
| ) | |
| num_questions = gr.Slider( | |
| minimum=5, | |
| maximum=15, | |
| value=10, | |
| step=1, | |
| label="Number of Questions to Generate" | |
| ) | |
| generate_btn = gr.Button("Generate Quiz", variant="primary") | |
| with gr.Column(): | |
| output = gr.Textbox( | |
| label="Generated Quiz", | |
| lines=20, | |
| max_lines=30, | |
| show_copy_button=True | |
| ) | |
| generate_btn.click( | |
| fn=generate_quiz, | |
| inputs=[pdf_input, num_questions], | |
| outputs=output | |
| ) | |
| gr.Markdown(""" | |
| ### Instructions: | |
| 1. Upload a PDF file containing the chapter or text you want to create a quiz from | |
| 2. Select the number of questions you want (5-15) | |
| 3. Click "Generate Quiz" and wait for the results | |
| 4. Copy the generated quiz for your use | |
| **Note:** The quality of questions depends on the clarity and content of your PDF text. | |
| """) | |
| return interface | |
| if __name__ == "__main__": | |
| interface = create_interface() | |
| interface.launch(debug=True, share=False) |