pratikshahp commited on
Commit
adfa719
·
verified ·
1 Parent(s): 953f0c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -14
app.py CHANGED
@@ -1,11 +1,11 @@
1
  import streamlit as st
2
  import fitz # PyMuPDF
3
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
4
 
5
- # Load the model and tokenizer
6
- model_name = "t5-small" # or another model suitable for question generation
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
9
 
10
  # Function to extract text from PDF
11
  def extract_text_from_pdf(pdf_file):
@@ -16,7 +16,7 @@ def extract_text_from_pdf(pdf_file):
16
  text += page.get_text()
17
  return text
18
 
19
- # Function to generate MCQs using Hugging Face model
20
  def generate_mcqs(text, num_questions=5):
21
  if not text.strip():
22
  return ["No text extracted from the PDF. Unable to generate MCQs."]
@@ -25,21 +25,19 @@ def generate_mcqs(text, num_questions=5):
25
  inputs = tokenizer(text, return_tensors="pt", max_length=max_input_length, truncation=True)
26
 
27
  # Create the question generation pipeline
28
- generator = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
29
  mcqs = []
30
  for _ in range(num_questions):
31
  # Generate a single MCQ at a time
32
- input_text = f"generate question: {tokenizer.decode(inputs['input_ids'][0])}"
33
  generated = generator(input_text, max_length=100, num_return_sequences=1)
34
- mcq = generated[0]["generated_text"]
35
 
36
- # Dummy logic to create options and correct answer (for demonstration purposes)
37
- # This logic should be replaced with actual code to generate meaningful MCQ options
38
- question = mcq
39
- options = ["Option A", "Option B", "Option C", "Option D"]
40
- correct_answer = options[0] # Placeholder for correct answer
41
 
42
- mcq_formatted = f"Q: {question}\nOption A: {options[0]}\nOption B: {options[1]}\nOption C: {options[2]}\nOption D: {options[3]}\nCorrect Answer: {correct_answer}"
43
  mcqs.append(mcq_formatted)
44
 
45
  return mcqs
 
1
  import streamlit as st
2
  import fitz # PyMuPDF
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
 
5
+ # Load the LLaMA model and tokenizer
6
+ model_name = "meta-llama/Meta-Llama-3-8B" # Update this with the correct LLaMA model name
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModelForCausalLM.from_pretrained(model_name)
9
 
10
  # Function to extract text from PDF
11
  def extract_text_from_pdf(pdf_file):
 
16
  text += page.get_text()
17
  return text
18
 
19
+ # Function to generate MCQs using the LLaMA model
20
  def generate_mcqs(text, num_questions=5):
21
  if not text.strip():
22
  return ["No text extracted from the PDF. Unable to generate MCQs."]
 
25
  inputs = tokenizer(text, return_tensors="pt", max_length=max_input_length, truncation=True)
26
 
27
  # Create the question generation pipeline
28
+ generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
29
  mcqs = []
30
  for _ in range(num_questions):
31
  # Generate a single MCQ at a time
32
+ input_text = f"Generate a multiple choice question from the following text: {tokenizer.decode(inputs['input_ids'][0])}"
33
  generated = generator(input_text, max_length=100, num_return_sequences=1)
34
+ question_text = generated[0]["generated_text"]
35
 
36
+ # Format the MCQ
37
+ options = ["Option A: ABC", "Option B: DEF", "Option C: GHI", "Option D: JKL"] # Placeholder options
38
+ correct_answer = "Option A: ABC" # Placeholder correct answer for now
 
 
39
 
40
+ mcq_formatted = f"Q: {question_text}\nOption A: {options[0]}\nOption B: {options[1]}\nOption C: {options[2]}\nOption D: {options[3]}\nCorrect Answer: {correct_answer}"
41
  mcqs.append(mcq_formatted)
42
 
43
  return mcqs