Bushra346's picture
Update app.py
cd986fa verified
import gradio as gr
import fitz # PyMuPDF
from transformers import pipeline
import tempfile
import os
import re
# Load the model
quiz_generator = pipeline("text2text-generation", model="google/flan-t5-base")
def extract_text_from_pdf(pdf_file):
"""Extract text from PDF file"""
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
temp_file.write(pdf_file)
temp_path = temp_file.name
try:
doc = fitz.open(temp_path)
text = ""
for page in doc:
text += page.get_text()
doc.close()
return text
finally:
# Clean up temporary file
if os.path.exists(temp_path):
os.unlink(temp_path)
def chunk_text(text, chunk_size=800):
"""Split text into chunks for better processing"""
words = text.split()
chunks = []
current_chunk = []
current_length = 0
for word in words:
if current_length + len(word) > chunk_size and current_chunk:
chunks.append(' '.join(current_chunk))
current_chunk = [word]
current_length = len(word)
else:
current_chunk.append(word)
current_length += len(word) + 1
if current_chunk:
chunks.append(' '.join(current_chunk))
return chunks
def generate_quiz(pdf_bytes, num_questions=10):
"""Generate quiz from PDF"""
if pdf_bytes is None:
return "Error: Please upload a PDF file first."
try:
# Extract text from PDF
text = extract_text_from_pdf(pdf_bytes)
if not text.strip():
return "Error: No text found in the PDF. Please check if the PDF contains readable text."
# Clean the text
text = re.sub(r'\s+', ' ', text).strip()
# Split text into chunks for better processing
chunks = chunk_text(text, chunk_size=1000)
all_questions = []
questions_per_chunk = max(1, num_questions // len(chunks))
for i, chunk in enumerate(chunks[:3]): # Process max 3 chunks to avoid timeout
remaining_questions = num_questions - len(all_questions)
if remaining_questions <= 0:
break
questions_to_generate = min(questions_per_chunk, remaining_questions)
if i == len(chunks) - 1: # Last chunk gets remaining questions
questions_to_generate = remaining_questions
input_prompt = f"""Create {questions_to_generate} multiple choice questions from this text.
Format each question as:
Q1. [Question text]
A) [Option A]
B) [Option B]
C) [Option C]
D) [Option D]
Correct Answer: [Letter] - [Explanation]
Text: {chunk}"""
try:
result = quiz_generator(
input_prompt,
max_length=512,
do_sample=True,
temperature=0.7,
top_p=0.9
)[0]['generated_text']
all_questions.append(result)
except Exception as e:
print(f"Error generating questions for chunk {i+1}: {str(e)}")
continue
if not all_questions:
return "Error: Failed to generate questions. The text might be too complex or the model encountered an error."
# Combine all questions
final_quiz = "\n\n".join(all_questions)
# Format the output
formatted_quiz = f"""QUIZ GENERATED FROM YOUR PDF
============================================================
{final_quiz}
============================================================
Total Questions Generated: {len(all_questions)} sections
Note: Review each question carefully and verify the answers.
"""
return formatted_quiz
except Exception as e:
return f"Error processing PDF: {str(e)}\n\nPlease make sure you uploaded a valid PDF file."
# Create Gradio interface
def create_interface():
with gr.Blocks(title="PDF Quiz Generator") as interface:
gr.Markdown("""
# PDF to Quiz Generator
Upload a PDF chapter and generate multiple choice questions with answers and explanations.
""")
with gr.Row():
with gr.Column():
pdf_input = gr.File(
label="Upload PDF Chapter",
file_types=[".pdf"],
type="binary"
)
num_questions = gr.Slider(
minimum=5,
maximum=15,
value=10,
step=1,
label="Number of Questions to Generate"
)
generate_btn = gr.Button("Generate Quiz", variant="primary")
with gr.Column():
output = gr.Textbox(
label="Generated Quiz",
lines=20,
max_lines=30,
show_copy_button=True
)
generate_btn.click(
fn=generate_quiz,
inputs=[pdf_input, num_questions],
outputs=output
)
gr.Markdown("""
### Instructions:
1. Upload a PDF file containing the chapter or text you want to create a quiz from
2. Select the number of questions you want (5-15)
3. Click "Generate Quiz" and wait for the results
4. Copy the generated quiz for your use
**Note:** The quality of questions depends on the clarity and content of your PDF text.
""")
return interface
if __name__ == "__main__":
interface = create_interface()
interface.launch(debug=True, share=False)