pratikshahp commited on
Commit
71aedcb
·
verified ·
1 Parent(s): a754c60

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -7
app.py CHANGED
@@ -1,10 +1,11 @@
1
  import streamlit as st
2
  import fitz # PyMuPDF
3
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
4
 
5
  # Load the model and tokenizer
6
- tokenizer = AutoTokenizer.from_pretrained("openai-gpt")
7
- model = AutoModelForCausalLM.from_pretrained("openai-gpt")
 
8
 
9
  # Function to extract text from PDF
10
  def extract_text_from_pdf(pdf_file):
@@ -19,16 +20,18 @@ def extract_text_from_pdf(pdf_file):
19
  def generate_mcqs(text, num_questions=5):
20
  if not text.strip():
21
  return ["No text extracted from the PDF. Unable to generate MCQs."]
22
-
23
- # Truncate text to fit within the model's max token limit
24
  max_input_length = 512 - 100 # Reserve space for generated tokens
25
  inputs = tokenizer(text, return_tensors="pt", max_length=max_input_length, truncation=True)
26
 
 
 
27
  mcqs = []
28
- generator = pipeline("document-question-answering", model=model, tokenizer=tokenizer)
29
  for _ in range(num_questions):
30
  # Generate a single MCQ at a time
31
- mcq = generator(tokenizer.decode(inputs['input_ids'][0]), max_new_tokens=100)[0]["generated_text"]
 
 
32
  mcqs.append(mcq)
33
 
34
  return mcqs
 
1
  import streamlit as st
2
  import fitz # PyMuPDF
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
4
 
5
  # Load the model and tokenizer
6
+ model_name = "t5-small" # or another model suitable for question generation
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
9
 
10
  # Function to extract text from PDF
11
  def extract_text_from_pdf(pdf_file):
 
20
  def generate_mcqs(text, num_questions=5):
21
  if not text.strip():
22
  return ["No text extracted from the PDF. Unable to generate MCQs."]
23
+
 
24
  max_input_length = 512 - 100 # Reserve space for generated tokens
25
  inputs = tokenizer(text, return_tensors="pt", max_length=max_input_length, truncation=True)
26
 
27
+ # Create the question generation pipeline
28
+ generator = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
29
  mcqs = []
 
30
  for _ in range(num_questions):
31
  # Generate a single MCQ at a time
32
+ input_text = f"generate question: {tokenizer.decode(inputs['input_ids'][0])}"
33
+ generated = generator(input_text, max_length=100, num_return_sequences=1)
34
+ mcq = generated[0]["generated_text"]
35
  mcqs.append(mcq)
36
 
37
  return mcqs