Spaces:

pratikshahp
/

Question-Answer-Generation-App

Build error

File size: 2,019 Bytes

043014d
 
eefadc3
8934212
ed82318
 
71aedcb
eefadc3
043014d
 
 
 
 
 
 
 
 
 
eefadc3
 
 
 
71aedcb
eefadc3
adfa719
c233fdc
eefadc3
 
 
 
69d7207
eefadc3
 
ece53ba
eefadc3
043014d
 
eefadc3
043014d
eefadc3
043014d
 
 
 
 
90ba42f
043014d
eefadc3
 
 
043014d
eefadc3

import streamlit as st
import fitz  # PyMuPDF
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# Load model directly
model_name = "openai-community/gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Function to extract text from PDF
def extract_text_from_pdf(pdf_file):
    pdf_document = fitz.open(stream=pdf_file.read(), filetype="pdf")
    text = ""
    for page_num in range(pdf_document.page_count):
        page = pdf_document.load_page(page_num)
        text += page.get_text()
    return text

# Function to generate questions using GPT-2
def generate_questions(text, num_questions=5):
    if not text.strip():
        return ["No text extracted from the PDF. Unable to generate questions."]
    
    # Create the text generation pipeline
    generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

    questions = []
    for _ in range(num_questions):
        # Generate a single question at a time
        prompt = f"Generate a question based on the following text:\n{text}\n\nQuestion:"
        generated = generator(prompt, max_length=500, num_return_sequences=1)
        question = generated[0]["generated_text"].split("Question:")[1].strip()
        questions.append(question)

    return questions

# Streamlit app interface
st.title("PDF to Question Generator")

uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])

if uploaded_file is not None:
    st.write("Extracting text from the PDF...")
    text = extract_text_from_pdf(uploaded_file)
    st.write("Text extracted successfully!")
    st.write("Extracted Text:", text)

    st.write("Generating questions...")
    num_questions = st.number_input("Number of questions to generate", min_value=1, max_value=20, value=5, step=1, format="%d")
    questions = generate_questions(text, num_questions)

    st.write("Generated Questions:")
    for idx, question in enumerate(questions):
        st.write(f"{idx+1}. {question}")