Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from PyPDF2 import PdfReader
|
| 3 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 4 |
+
from langchain import PromptTemplate
|
| 5 |
+
from langchain import LLMChain
|
| 6 |
+
from langchain_together import Together
|
| 7 |
+
import re
|
| 8 |
+
from docx import Document
|
| 9 |
+
import os
|
| 10 |
+
|
| 11 |
+
# Initialize Together API key
|
| 12 |
+
os.environ['TOGETHER_API_KEY'] = "c2f52626b97118b71c0c36f66eda4f5957c8fc475e760c3d72f98ba07d3ed3b5"
|
| 13 |
+
checkpoint = "sshleifer/distilbart-cnn-12-6"
|
| 14 |
+
llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500)
|
| 15 |
+
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
| 16 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
|
| 17 |
+
def Summary_BART(text):
|
| 18 |
+
inputs = tokenizer(text, max_length=1024, truncation=True, return_tensors="pt")
|
| 19 |
+
summary_ids = model.generate(inputs["input_ids"])
|
| 20 |
+
summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
|
| 21 |
+
return summary[0]
|
| 22 |
+
|
| 23 |
+
def DocToQuizz(file, difficulty_level):
|
| 24 |
+
# Read the PDF content
|
| 25 |
+
reader = PdfReader(file)
|
| 26 |
+
text = ""
|
| 27 |
+
for page in reader.pages:
|
| 28 |
+
text += page.extract_text()
|
| 29 |
+
summary = Summary_BART(text)
|
| 30 |
+
|
| 31 |
+
# Define the prompt template for generating questions
|
| 32 |
+
mcq_template = """
|
| 33 |
+
Generate 20 different questions based on the following summary: {summary}
|
| 34 |
+
The difficulty level of the questions should be: {difficulty_level}
|
| 35 |
+
|
| 36 |
+
For the multiple-choice questions (MCQs), please provide the following for each question:
|
| 37 |
+
1. Question
|
| 38 |
+
- Use varied question formats such as:
|
| 39 |
+
- "How does...", "Why is...", "In what way...", "Which of the following...", "When does...", etc.
|
| 40 |
+
- Ensure questions are logically phrased and relevant to the content.
|
| 41 |
+
2. Correct answer
|
| 42 |
+
3. Three plausible incorrect answer options
|
| 43 |
+
4. Format: "Question: <question text>\nCorrect answer: <correct answer>\nIncorrect answers: <option1>, <option2>, <option3>"
|
| 44 |
+
|
| 45 |
+
For the short questions, please provide:
|
| 46 |
+
1. Question
|
| 47 |
+
- Use varied question formats to encourage conceptual understanding and avoid repetition.
|
| 48 |
+
- Ensure the short questions do not overlap in content with the MCQs.
|
| 49 |
+
2. Short, concise answer
|
| 50 |
+
3. Format: "SQ: <question text>\nAnswer: <answer>"
|
| 51 |
+
|
| 52 |
+
Generate 10 MCQs and 10 unique short questions in total, ensuring diverse question structures and logical phrasing.
|
| 53 |
+
"""
|
| 54 |
+
prompt = PromptTemplate(
|
| 55 |
+
input_variables=['summary', 'difficulty_level'],
|
| 56 |
+
template=mcq_template
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
Generated_mcqs = LLMChain(llm=llama3, prompt=prompt)
|
| 60 |
+
|
| 61 |
+
response = Generated_mcqs.invoke({
|
| 62 |
+
"summary": summary,
|
| 63 |
+
"difficulty_level": difficulty_level
|
| 64 |
+
})
|
| 65 |
+
|
| 66 |
+
response_text = response['text']
|
| 67 |
+
|
| 68 |
+
# Extract MCQs and Short Questions
|
| 69 |
+
mcq_pattern = r'\d+\.\s*Question:\s*(.*?)\nCorrect answer:\s*(.*?)\nIncorrect answers:\s*(.*?)\n'
|
| 70 |
+
short_question_pattern = r'\d+\.\s*SQ:\s*(.*?)\n'
|
| 71 |
+
|
| 72 |
+
mcqs = re.findall(mcq_pattern, response_text, re.DOTALL)
|
| 73 |
+
short_questions = re.findall(short_question_pattern, response_text, re.DOTALL)
|
| 74 |
+
|
| 75 |
+
# Initialize a Word document
|
| 76 |
+
doc = Document()
|
| 77 |
+
doc.add_heading("Physics Questions", level=1)
|
| 78 |
+
|
| 79 |
+
# Add a section for MCQs with options
|
| 80 |
+
doc.add_heading("Multiple Choice Questions (MCQs)", level=2)
|
| 81 |
+
for idx, (question, correct_answer, incorrect_answers) in enumerate(mcqs, start=1):
|
| 82 |
+
# Split incorrect answers
|
| 83 |
+
incorrect_answers = incorrect_answers.split(', ')
|
| 84 |
+
|
| 85 |
+
# Add question and options to the document
|
| 86 |
+
doc.add_paragraph(f"Q{idx}: {question.strip()}", style="List Number")
|
| 87 |
+
doc.add_paragraph(f"A) {correct_answer.strip()}", style="List Bullet")
|
| 88 |
+
for i, incorrect in enumerate(incorrect_answers, start=2):
|
| 89 |
+
doc.add_paragraph(f"{chr(64 + i)}) {incorrect.strip()}", style="List Bullet")
|
| 90 |
+
|
| 91 |
+
# Add a page break and section for Short Questions
|
| 92 |
+
doc.add_page_break()
|
| 93 |
+
doc.add_heading("Short Questions", level=2)
|
| 94 |
+
for idx, question in enumerate(short_questions, start=1):
|
| 95 |
+
doc.add_paragraph(f"{idx}. {question.strip()}", style="Body Text")
|
| 96 |
+
|
| 97 |
+
# Save the document
|
| 98 |
+
doc.save("Physics_Questions.docx")
|
| 99 |
+
return "Physics_Questions.docx"
|
| 100 |
+
|
| 101 |
+
# Get list of PDF files in the directory
|
| 102 |
+
pdf_files = ['output_range_1.pdf','output_range_2.pdf','output_range_3.pdf','output_range_4.pdf','output_range_5.pdf','output_range_6.pdf','output_range_7.pdf','output_range_8.pdf','output_range_9.pdf']
|
| 103 |
+
difficulty_levels = ["Easy", "Medium", "Hard"]
|
| 104 |
+
|
| 105 |
+
# Gradio Interface
|
| 106 |
+
def generate_quiz(file, difficulty_level):
|
| 107 |
+
output_file = DocToQuizz(file, difficulty_level)
|
| 108 |
+
return output_file
|
| 109 |
+
|
| 110 |
+
interface = gr.Interface(
|
| 111 |
+
fn=generate_quiz,
|
| 112 |
+
inputs=[
|
| 113 |
+
gr.Dropdown(pdf_files, label="Select PDF File"),
|
| 114 |
+
gr.Dropdown(difficulty_levels, label="Select Difficulty Level",value='output_range_1.pdf')
|
| 115 |
+
],
|
| 116 |
+
outputs=gr.File(label="Download Quiz Document"),
|
| 117 |
+
title="Quiz Generator",
|
| 118 |
+
description="Select a PDF file and difficulty level to generate quiz questions."
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
# Launch the interface
|
| 122 |
+
interface.launch(debug=True)
|