Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import fitz # PyMuPDF
|
| 3 |
-
from transformers import
|
| 4 |
|
| 5 |
# Load the model and tokenizer
|
| 6 |
-
|
| 7 |
-
|
|
|
|
| 8 |
|
| 9 |
# Function to extract text from PDF
|
| 10 |
def extract_text_from_pdf(pdf_file):
|
|
@@ -19,16 +20,18 @@ def extract_text_from_pdf(pdf_file):
|
|
| 19 |
def generate_mcqs(text, num_questions=5):
|
| 20 |
if not text.strip():
|
| 21 |
return ["No text extracted from the PDF. Unable to generate MCQs."]
|
| 22 |
-
|
| 23 |
-
# Truncate text to fit within the model's max token limit
|
| 24 |
max_input_length = 512 - 100 # Reserve space for generated tokens
|
| 25 |
inputs = tokenizer(text, return_tensors="pt", max_length=max_input_length, truncation=True)
|
| 26 |
|
|
|
|
|
|
|
| 27 |
mcqs = []
|
| 28 |
-
generator = pipeline("document-question-answering", model=model, tokenizer=tokenizer)
|
| 29 |
for _ in range(num_questions):
|
| 30 |
# Generate a single MCQ at a time
|
| 31 |
-
|
|
|
|
|
|
|
| 32 |
mcqs.append(mcq)
|
| 33 |
|
| 34 |
return mcqs
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import fitz # PyMuPDF
|
| 3 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
| 4 |
|
| 5 |
# Load the model and tokenizer
|
| 6 |
+
model_name = "t5-small" # or another model suitable for question generation
|
| 7 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 8 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
| 9 |
|
| 10 |
# Function to extract text from PDF
|
| 11 |
def extract_text_from_pdf(pdf_file):
|
|
|
|
| 20 |
def generate_mcqs(text, num_questions=5):
|
| 21 |
if not text.strip():
|
| 22 |
return ["No text extracted from the PDF. Unable to generate MCQs."]
|
| 23 |
+
|
|
|
|
| 24 |
max_input_length = 512 - 100 # Reserve space for generated tokens
|
| 25 |
inputs = tokenizer(text, return_tensors="pt", max_length=max_input_length, truncation=True)
|
| 26 |
|
| 27 |
+
# Create the question generation pipeline
|
| 28 |
+
generator = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
|
| 29 |
mcqs = []
|
|
|
|
| 30 |
for _ in range(num_questions):
|
| 31 |
# Generate a single MCQ at a time
|
| 32 |
+
input_text = f"generate question: {tokenizer.decode(inputs['input_ids'][0])}"
|
| 33 |
+
generated = generator(input_text, max_length=100, num_return_sequences=1)
|
| 34 |
+
mcq = generated[0]["generated_text"]
|
| 35 |
mcqs.append(mcq)
|
| 36 |
|
| 37 |
return mcqs
|