Update app.py
Browse files
app.py
CHANGED
|
@@ -1,15 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
-
import spacy
|
| 3 |
-
import torch
|
| 4 |
-
import subprocess
|
| 5 |
-
import sys
|
| 6 |
from transformers import T5ForConditionalGeneration, AutoTokenizer
|
| 7 |
|
| 8 |
-
subprocess.run(
|
| 9 |
-
[sys.executable, "-m", "spacy", "download", "en_core_web_sm"],
|
| 10 |
-
check=True
|
| 11 |
-
)
|
| 12 |
-
|
| 13 |
MODEL_ID = "Hamzasajjad38/t5-small-qg"
|
| 14 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
|
| 15 |
model = T5ForConditionalGeneration.from_pretrained(MODEL_ID)
|
|
@@ -18,81 +14,54 @@ model = model.to(device)
|
|
| 18 |
nlp = spacy.load("en_core_web_sm")
|
| 19 |
|
| 20 |
def generate_questions(passage, num_questions, question_type):
|
| 21 |
-
if not passage
|
| 22 |
-
return "
|
| 23 |
-
|
| 24 |
doc = nlp(passage)
|
| 25 |
-
candidates = [
|
| 26 |
-
|
| 27 |
if len(candidates) < num_questions:
|
| 28 |
-
candidates += [
|
| 29 |
-
|
| 30 |
seen, unique = set(), []
|
| 31 |
for c in candidates:
|
| 32 |
-
|
| 33 |
-
|
|
|
|
| 34 |
unique.append(c)
|
| 35 |
-
candidates = unique[:num_questions]
|
| 36 |
-
|
| 37 |
if not candidates:
|
| 38 |
-
return "
|
| 39 |
-
|
| 40 |
lines = []
|
| 41 |
for i, answer in enumerate(candidates):
|
| 42 |
highlighted = passage.replace(answer, f"<hl> {answer} <hl>", 1)
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
if question_type == "True / False":
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
lines.append(f"Q{i+1}: {question}")
|
| 54 |
lines.append(f" Answer: {answer}\n")
|
| 55 |
-
|
| 56 |
return "\n".join(lines)
|
| 57 |
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
output_box = gr.Textbox(
|
| 78 |
-
label="Generated Questions",
|
| 79 |
-
lines=16,
|
| 80 |
-
interactive=False
|
| 81 |
-
)
|
| 82 |
-
|
| 83 |
-
gr.Examples(
|
| 84 |
-
examples=[
|
| 85 |
-
["Photosynthesis is a process used by plants to convert light energy into chemical energy stored in glucose. It occurs inside chloroplasts using chlorophyll to absorb sunlight.", 3, "Short answer"],
|
| 86 |
-
["The Industrial Revolution began in Britain in the late 18th century. The steam engine invented by James Watt transformed manufacturing and transportation.", 3, "Short answer"],
|
| 87 |
-
["Machine learning is a subset of artificial intelligence where systems learn from data to improve performance without being explicitly programmed.", 2, "True / False"],
|
| 88 |
-
],
|
| 89 |
-
inputs=[passage_input, num_q, q_type]
|
| 90 |
-
)
|
| 91 |
-
|
| 92 |
-
gen_btn.click(
|
| 93 |
-
fn=generate_questions,
|
| 94 |
-
inputs=[passage_input, num_q, q_type],
|
| 95 |
-
outputs=output_box
|
| 96 |
-
)
|
| 97 |
|
| 98 |
-
demo.launch(
|
|
|
|
| 1 |
+
import subprocess, sys
|
| 2 |
+
subprocess.run([sys.executable, "-m", "spacy", "download", "en_core_web_sm"],
|
| 3 |
+
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
| 4 |
+
|
| 5 |
import gradio as gr
|
| 6 |
+
import spacy, torch
|
|
|
|
|
|
|
|
|
|
| 7 |
from transformers import T5ForConditionalGeneration, AutoTokenizer
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
MODEL_ID = "Hamzasajjad38/t5-small-qg"
|
| 10 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
|
| 11 |
model = T5ForConditionalGeneration.from_pretrained(MODEL_ID)
|
|
|
|
| 14 |
nlp = spacy.load("en_core_web_sm")
|
| 15 |
|
| 16 |
def generate_questions(passage, num_questions, question_type):
|
| 17 |
+
if not passage or len(passage.strip()) < 30:
|
| 18 |
+
return "Please enter a longer passage (at least 30 characters)."
|
|
|
|
| 19 |
doc = nlp(passage)
|
| 20 |
+
candidates = [e.text for e in doc.ents]
|
|
|
|
| 21 |
if len(candidates) < num_questions:
|
| 22 |
+
candidates += [c.text for c in doc.noun_chunks]
|
|
|
|
| 23 |
seen, unique = set(), []
|
| 24 |
for c in candidates:
|
| 25 |
+
cl = c.lower().strip()
|
| 26 |
+
if cl not in seen and 1 < len(c.split()) <= 5:
|
| 27 |
+
seen.add(cl)
|
| 28 |
unique.append(c)
|
| 29 |
+
candidates = unique[:int(num_questions)]
|
|
|
|
| 30 |
if not candidates:
|
| 31 |
+
return "Could not extract answer candidates. Try a more detailed passage."
|
|
|
|
| 32 |
lines = []
|
| 33 |
for i, answer in enumerate(candidates):
|
| 34 |
highlighted = passage.replace(answer, f"<hl> {answer} <hl>", 1)
|
| 35 |
+
inp = tokenizer(f"generate question: {highlighted}",
|
| 36 |
+
return_tensors="pt", max_length=512,
|
| 37 |
+
truncation=True).to(device)
|
| 38 |
+
out = model.generate(**inp, max_new_tokens=64,
|
| 39 |
+
num_beams=4, early_stopping=True)
|
| 40 |
+
q = tokenizer.decode(out[0], skip_special_tokens=True)
|
|
|
|
| 41 |
if question_type == "True / False":
|
| 42 |
+
q = f"True or False: {q.rstrip('?')}?"
|
| 43 |
+
lines.append(f"Q{i+1}: {q}")
|
|
|
|
| 44 |
lines.append(f" Answer: {answer}\n")
|
|
|
|
| 45 |
return "\n".join(lines)
|
| 46 |
|
| 47 |
+
demo = gr.Interface(
|
| 48 |
+
fn=generate_questions,
|
| 49 |
+
inputs=[
|
| 50 |
+
gr.Textbox(lines=8, label="Input Passage",
|
| 51 |
+
placeholder="Paste any educational paragraph here..."),
|
| 52 |
+
gr.Slider(1, 5, value=3, step=1, label="Number of Questions"),
|
| 53 |
+
gr.Radio(["Short answer", "True / False"],
|
| 54 |
+
value="Short answer", label="Question Type"),
|
| 55 |
+
],
|
| 56 |
+
outputs=gr.Textbox(lines=14, label="Generated Questions"),
|
| 57 |
+
title="Automatic Question Generator",
|
| 58 |
+
description="Fine-tuned T5-small on SQuAD 1.1 — paste any passage to generate questions.",
|
| 59 |
+
examples=[
|
| 60 |
+
["Photosynthesis is a process used by plants to convert light energy into chemical energy stored in glucose. It occurs inside chloroplasts using chlorophyll to absorb sunlight.", 3, "Short answer"],
|
| 61 |
+
["The Industrial Revolution began in Britain in the late 18th century. The steam engine invented by James Watt transformed manufacturing and transportation across the country.", 3, "Short answer"],
|
| 62 |
+
["Machine learning is a subset of artificial intelligence where systems learn from data to improve performance without being explicitly programmed.", 2, "True / False"],
|
| 63 |
+
],
|
| 64 |
+
allow_flagging="never",
|
| 65 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
+
demo.launch()
|