import streamlit as st import fitz # PyMuPDF from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline # Load model directly model_name = "openai-community/gpt2" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) # Function to extract text from PDF def extract_text_from_pdf(pdf_file): pdf_document = fitz.open(stream=pdf_file.read(), filetype="pdf") text = "" for page_num in range(pdf_document.page_count): page = pdf_document.load_page(page_num) text += page.get_text() return text # Function to generate questions using GPT-2 def generate_questions(text, num_questions=5): if not text.strip(): return ["No text extracted from the PDF. Unable to generate questions."] # Create the text generation pipeline generator = pipeline("text-generation", model=model, tokenizer=tokenizer) questions = [] for _ in range(num_questions): # Generate a single question at a time prompt = f"Generate a question based on the following text:\n{text}\n\nQuestion:" generated = generator(prompt, max_length=500, num_return_sequences=1) question = generated[0]["generated_text"].split("Question:")[1].strip() questions.append(question) return questions # Streamlit app interface st.title("PDF to Question Generator") uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) if uploaded_file is not None: st.write("Extracting text from the PDF...") text = extract_text_from_pdf(uploaded_file) st.write("Text extracted successfully!") st.write("Extracted Text:", text) st.write("Generating questions...") num_questions = st.number_input("Number of questions to generate", min_value=1, max_value=20, value=5, step=1, format="%d") questions = generate_questions(text, num_questions) st.write("Generated Questions:") for idx, question in enumerate(questions): st.write(f"{idx+1}. {question}")