Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import fitz # PyMuPDF
|
| 3 |
-
from transformers import AutoTokenizer,
|
| 4 |
|
| 5 |
-
# Load the model and tokenizer
|
| 6 |
-
model_name = "
|
| 7 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 8 |
-
model =
|
| 9 |
|
| 10 |
# Function to extract text from PDF
|
| 11 |
def extract_text_from_pdf(pdf_file):
|
|
@@ -16,7 +16,7 @@ def extract_text_from_pdf(pdf_file):
|
|
| 16 |
text += page.get_text()
|
| 17 |
return text
|
| 18 |
|
| 19 |
-
# Function to generate MCQs using
|
| 20 |
def generate_mcqs(text, num_questions=5):
|
| 21 |
if not text.strip():
|
| 22 |
return ["No text extracted from the PDF. Unable to generate MCQs."]
|
|
@@ -25,21 +25,19 @@ def generate_mcqs(text, num_questions=5):
|
|
| 25 |
inputs = tokenizer(text, return_tensors="pt", max_length=max_input_length, truncation=True)
|
| 26 |
|
| 27 |
# Create the question generation pipeline
|
| 28 |
-
generator = pipeline("
|
| 29 |
mcqs = []
|
| 30 |
for _ in range(num_questions):
|
| 31 |
# Generate a single MCQ at a time
|
| 32 |
-
input_text = f"
|
| 33 |
generated = generator(input_text, max_length=100, num_return_sequences=1)
|
| 34 |
-
|
| 35 |
|
| 36 |
-
#
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
options = ["Option A", "Option B", "Option C", "Option D"]
|
| 40 |
-
correct_answer = options[0] # Placeholder for correct answer
|
| 41 |
|
| 42 |
-
mcq_formatted = f"Q: {
|
| 43 |
mcqs.append(mcq_formatted)
|
| 44 |
|
| 45 |
return mcqs
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import fitz # PyMuPDF
|
| 3 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
| 4 |
|
| 5 |
+
# Load the LLaMA model and tokenizer
|
| 6 |
+
model_name = "meta-llama/Meta-Llama-3-8B" # Update this with the correct LLaMA model name
|
| 7 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 8 |
+
model = AutoModelForCausalLM.from_pretrained(model_name)
|
| 9 |
|
| 10 |
# Function to extract text from PDF
|
| 11 |
def extract_text_from_pdf(pdf_file):
|
|
|
|
| 16 |
text += page.get_text()
|
| 17 |
return text
|
| 18 |
|
| 19 |
+
# Function to generate MCQs using the LLaMA model
|
| 20 |
def generate_mcqs(text, num_questions=5):
|
| 21 |
if not text.strip():
|
| 22 |
return ["No text extracted from the PDF. Unable to generate MCQs."]
|
|
|
|
| 25 |
inputs = tokenizer(text, return_tensors="pt", max_length=max_input_length, truncation=True)
|
| 26 |
|
| 27 |
# Create the question generation pipeline
|
| 28 |
+
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
| 29 |
mcqs = []
|
| 30 |
for _ in range(num_questions):
|
| 31 |
# Generate a single MCQ at a time
|
| 32 |
+
input_text = f"Generate a multiple choice question from the following text: {tokenizer.decode(inputs['input_ids'][0])}"
|
| 33 |
generated = generator(input_text, max_length=100, num_return_sequences=1)
|
| 34 |
+
question_text = generated[0]["generated_text"]
|
| 35 |
|
| 36 |
+
# Format the MCQ
|
| 37 |
+
options = ["Option A: ABC", "Option B: DEF", "Option C: GHI", "Option D: JKL"] # Placeholder options
|
| 38 |
+
correct_answer = "Option A: ABC" # Placeholder correct answer for now
|
|
|
|
|
|
|
| 39 |
|
| 40 |
+
mcq_formatted = f"Q: {question_text}\nOption A: {options[0]}\nOption B: {options[1]}\nOption C: {options[2]}\nOption D: {options[3]}\nCorrect Answer: {correct_answer}"
|
| 41 |
mcqs.append(mcq_formatted)
|
| 42 |
|
| 43 |
return mcqs
|