Spaces:

pratikshahp
/

Question-Answer-Generation-App

Build error

App Files Files Community

pratikshahp commited on Jun 12, 2024

Commit

adfa719

verified ·

1 Parent(s): 953f0c6

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -14

app.py CHANGED Viewed

@@ -1,11 +1,11 @@
 import streamlit as st
 import fitz  # PyMuPDF
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
-# Load the model and tokenizer
-model_name = "t5-small"  # or another model suitable for question generation
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
 # Function to extract text from PDF
 def extract_text_from_pdf(pdf_file):
@@ -16,7 +16,7 @@ def extract_text_from_pdf(pdf_file):
         text += page.get_text()
     return text
-# Function to generate MCQs using Hugging Face model
 def generate_mcqs(text, num_questions=5):
     if not text.strip():
         return ["No text extracted from the PDF. Unable to generate MCQs."]
@@ -25,21 +25,19 @@ def generate_mcqs(text, num_questions=5):
     inputs = tokenizer(text, return_tensors="pt", max_length=max_input_length, truncation=True)
     # Create the question generation pipeline
-    generator = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
     mcqs = []
     for _ in range(num_questions):
         # Generate a single MCQ at a time
-        input_text = f"generate question: {tokenizer.decode(inputs['input_ids'][0])}"
         generated = generator(input_text, max_length=100, num_return_sequences=1)
-        mcq = generated[0]["generated_text"]
-        # Dummy logic to create options and correct answer (for demonstration purposes)
-        # This logic should be replaced with actual code to generate meaningful MCQ options
-        question = mcq
-        options = ["Option A", "Option B", "Option C", "Option D"]
-        correct_answer = options[0]  # Placeholder for correct answer
-        mcq_formatted = f"Q: {question}\nOption A: {options[0]}\nOption B: {options[1]}\nOption C: {options[2]}\nOption D: {options[3]}\nCorrect Answer: {correct_answer}"
         mcqs.append(mcq_formatted)
     return mcqs

 import streamlit as st
 import fitz  # PyMuPDF
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+# Load the LLaMA model and tokenizer
+model_name = "meta-llama/Meta-Llama-3-8B"  # Update this with the correct LLaMA model name
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name)
 # Function to extract text from PDF
 def extract_text_from_pdf(pdf_file):
         text += page.get_text()
     return text
+# Function to generate MCQs using the LLaMA model
 def generate_mcqs(text, num_questions=5):
     if not text.strip():
         return ["No text extracted from the PDF. Unable to generate MCQs."]
     inputs = tokenizer(text, return_tensors="pt", max_length=max_input_length, truncation=True)
     # Create the question generation pipeline
+    generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
     mcqs = []
     for _ in range(num_questions):
         # Generate a single MCQ at a time
+        input_text = f"Generate a multiple choice question from the following text: {tokenizer.decode(inputs['input_ids'][0])}"
         generated = generator(input_text, max_length=100, num_return_sequences=1)
+        question_text = generated[0]["generated_text"]
+        # Format the MCQ
+        options = ["Option A: ABC", "Option B: DEF", "Option C: GHI", "Option D: JKL"]  # Placeholder options
+        correct_answer = "Option A: ABC"  # Placeholder correct answer for now
+        mcq_formatted = f"Q: {question_text}\nOption A: {options[0]}\nOption B: {options[1]}\nOption C: {options[2]}\nOption D: {options[3]}\nCorrect Answer: {correct_answer}"
         mcqs.append(mcq_formatted)
     return mcqs