Spaces:

pratikshahp
/

Question-Answer-Generation-App

Build error

App Files Files Community

pratikshahp commited on Jul 9, 2024

Commit

ece53ba

verified ·

1 Parent(s): 8934212

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -12

app.py CHANGED Viewed

@@ -16,30 +16,43 @@ def extract_text_from_pdf(pdf_file):
         text += page.get_text()
     return text
 # Function to generate MCQs using the model
-def generate_mcqs(text, num_questions=5):
-    if not text.strip():
         return ["No text extracted from the PDF. Unable to generate MCQs."]
-    max_input_length = 512 - 100  # Reserve space for generated tokens
-    inputs = tokenizer(text, return_tensors="pt", max_length=max_input_length, truncation=True)
     # Create the question generation pipeline
     generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
     mcqs = []
-    for _ in range(num_questions):
-        # Generate a single MCQ at a time
-        input_text = f"Based on the following text, generate a multiple choice question:\n\n{text}\n\nQuestion:"
         generated = generator(input_text, max_length=400, num_return_sequences=1)
         question_text = generated[0]["generated_text"].split("Question:")[1].strip()
-        # Format the MCQ
-        options = ["Option A: Placeholder A", "Option B: Placeholder B", "Option C: Placeholder C", "Option D: Placeholder D"]
-        correct_answer = "Option A: Placeholder A"  # Placeholder correct answer for now
         mcq_formatted = f"Q: {question_text}\n{options[0]}\n{options[1]}\n{options[2]}\n{options[3]}\nCorrect Answer: {correct_answer}"
         mcqs.append(mcq_formatted)
     return mcqs
 # Streamlit app interface
@@ -55,7 +68,8 @@ if uploaded_file is not None:
     st.write("Generating MCQs...")
     num_questions = st.number_input("Number of MCQs to generate", min_value=1, max_value=20, value=5, step=1, format="%d")
-    mcqs = generate_mcqs(text, num_questions)
     st.write("Generated MCQs:")
     for idx, mcq in enumerate(mcqs):

         text += page.get_text()
     return text
+# Function to split text into chunks
+def split_text(text, chunk_size=500):
+    words = text.split()
+    chunks = [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
+    return chunks
 # Function to generate MCQs using the model
+def generate_mcqs(text_chunks, num_questions=5):
+    if not text_chunks:
         return ["No text extracted from the PDF. Unable to generate MCQs."]
     # Create the question generation pipeline
     generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
     mcqs = []
+    for chunk in text_chunks:
+        input_text = f"Generate a multiple-choice question from the following text:\n\n{chunk}\n\nQuestion:"
         generated = generator(input_text, max_length=400, num_return_sequences=1)
         question_text = generated[0]["generated_text"].split("Question:")[1].strip()
+        # Generate options for the question
+        options_text = f"Generate four plausible multiple-choice options for the following question:\n\n{question_text}\nOptions:"
+        options_generated = generator(options_text, max_length=200, num_return_sequences=1)
+        options_list = options_generated[0]["generated_text"].split("Options:")[1].strip().split("\n")
+        options = [f"Option {chr(65 + i)}: {option.strip()}" for i, option in enumerate(options_list[:4])]
+        if len(options) < 4:
+            continue
+        correct_answer = options[0]  # Placeholder for correct answer identification logic
         mcq_formatted = f"Q: {question_text}\n{options[0]}\n{options[1]}\n{options[2]}\n{options[3]}\nCorrect Answer: {correct_answer}"
         mcqs.append(mcq_formatted)
+        if len(mcqs) >= num_questions:
+            break
     return mcqs
 # Streamlit app interface
     st.write("Generating MCQs...")
     num_questions = st.number_input("Number of MCQs to generate", min_value=1, max_value=20, value=5, step=1, format="%d")
+    text_chunks = split_text(text)
+    mcqs = generate_mcqs(text_chunks, num_questions)
     st.write("Generated MCQs:")
     for idx, mcq in enumerate(mcqs):