Spaces:
Build error
Build error
| import streamlit as st | |
| import fitz # PyMuPDF | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
| # Load model directly | |
| model_name = "openai-community/gpt2" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained(model_name) | |
| # Function to extract text from PDF | |
| def extract_text_from_pdf(pdf_file): | |
| pdf_document = fitz.open(stream=pdf_file.read(), filetype="pdf") | |
| text = "" | |
| for page_num in range(pdf_document.page_count): | |
| page = pdf_document.load_page(page_num) | |
| text += page.get_text() | |
| return text | |
| # Function to generate questions using GPT-2 | |
| def generate_questions(text, num_questions=5): | |
| if not text.strip(): | |
| return ["No text extracted from the PDF. Unable to generate questions."] | |
| # Create the text generation pipeline | |
| generator = pipeline("text-generation", model=model, tokenizer=tokenizer) | |
| questions = [] | |
| for _ in range(num_questions): | |
| # Generate a single question at a time | |
| prompt = f"Generate a question based on the following text:\n{text}\n\nQuestion:" | |
| generated = generator(prompt, max_length=500, num_return_sequences=1) | |
| question = generated[0]["generated_text"].split("Question:")[1].strip() | |
| questions.append(question) | |
| return questions | |
| # Streamlit app interface | |
| st.title("PDF to Question Generator") | |
| uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) | |
| if uploaded_file is not None: | |
| st.write("Extracting text from the PDF...") | |
| text = extract_text_from_pdf(uploaded_file) | |
| st.write("Text extracted successfully!") | |
| st.write("Extracted Text:", text) | |
| st.write("Generating questions...") | |
| num_questions = st.number_input("Number of questions to generate", min_value=1, max_value=20, value=5, step=1, format="%d") | |
| questions = generate_questions(text, num_questions) | |
| st.write("Generated Questions:") | |
| for idx, question in enumerate(questions): | |
| st.write(f"{idx+1}. {question}") | |