Hamzasajjad38 commited on
Commit
99f63cf
·
verified ·
1 Parent(s): 873b826

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -73
app.py CHANGED
@@ -1,15 +1,11 @@
 
 
 
 
1
  import gradio as gr
2
- import spacy
3
- import torch
4
- import subprocess
5
- import sys
6
  from transformers import T5ForConditionalGeneration, AutoTokenizer
7
 
8
- subprocess.run(
9
- [sys.executable, "-m", "spacy", "download", "en_core_web_sm"],
10
- check=True
11
- )
12
-
13
  MODEL_ID = "Hamzasajjad38/t5-small-qg"
14
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
15
  model = T5ForConditionalGeneration.from_pretrained(MODEL_ID)
@@ -18,81 +14,54 @@ model = model.to(device)
18
  nlp = spacy.load("en_core_web_sm")
19
 
20
  def generate_questions(passage, num_questions, question_type):
21
- if not passage.strip() or len(passage) < 30:
22
- return "⚠️ Please enter a longer passage."
23
-
24
  doc = nlp(passage)
25
- candidates = [ent.text for ent in doc.ents]
26
-
27
  if len(candidates) < num_questions:
28
- candidates += [chunk.text for chunk in doc.noun_chunks]
29
-
30
  seen, unique = set(), []
31
  for c in candidates:
32
- if c.lower() not in seen and len(c.split()) <= 5:
33
- seen.add(c.lower())
 
34
  unique.append(c)
35
- candidates = unique[:num_questions]
36
-
37
  if not candidates:
38
- return "⚠️ Could not extract candidates. Try a longer passage."
39
-
40
  lines = []
41
  for i, answer in enumerate(candidates):
42
  highlighted = passage.replace(answer, f"<hl> {answer} <hl>", 1)
43
- input_text = f"generate question: {highlighted}"
44
- inputs = tokenizer(input_text, return_tensors="pt",
45
- max_length=512, truncation=True).to(device)
46
- outputs = model.generate(**inputs, max_new_tokens=64,
47
- num_beams=4, early_stopping=True)
48
- question = tokenizer.decode(outputs[0], skip_special_tokens=True)
49
-
50
  if question_type == "True / False":
51
- question = f"True or False: {question.rstrip('?')}?"
52
-
53
- lines.append(f"Q{i+1}: {question}")
54
  lines.append(f" Answer: {answer}\n")
55
-
56
  return "\n".join(lines)
57
 
58
- with gr.Blocks(title="Automatic Question Generator", theme=gr.themes.Soft()) as demo:
59
- gr.Markdown("# 🧠 Automatic Question Generator\n**Fine-tuned T5-small on SQuAD 1.1**")
60
-
61
- with gr.Row():
62
- with gr.Column():
63
- passage_input = gr.Textbox(
64
- label="Input Passage",
65
- placeholder="Paste any educational paragraph here...",
66
- lines=8
67
- )
68
- num_q = gr.Slider(1, 6, value=3, step=1, label="Number of questions")
69
- q_type = gr.Dropdown(
70
- ["Short answer", "True / False"],
71
- value="Short answer",
72
- label="Question type"
73
- )
74
- gen_btn = gr.Button("⚡ Generate Questions", variant="primary")
75
-
76
- with gr.Column():
77
- output_box = gr.Textbox(
78
- label="Generated Questions",
79
- lines=16,
80
- interactive=False
81
- )
82
-
83
- gr.Examples(
84
- examples=[
85
- ["Photosynthesis is a process used by plants to convert light energy into chemical energy stored in glucose. It occurs inside chloroplasts using chlorophyll to absorb sunlight.", 3, "Short answer"],
86
- ["The Industrial Revolution began in Britain in the late 18th century. The steam engine invented by James Watt transformed manufacturing and transportation.", 3, "Short answer"],
87
- ["Machine learning is a subset of artificial intelligence where systems learn from data to improve performance without being explicitly programmed.", 2, "True / False"],
88
- ],
89
- inputs=[passage_input, num_q, q_type]
90
- )
91
-
92
- gen_btn.click(
93
- fn=generate_questions,
94
- inputs=[passage_input, num_q, q_type],
95
- outputs=output_box
96
- )
97
 
98
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
+ import subprocess, sys
2
+ subprocess.run([sys.executable, "-m", "spacy", "download", "en_core_web_sm"],
3
+ stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
4
+
5
  import gradio as gr
6
+ import spacy, torch
 
 
 
7
  from transformers import T5ForConditionalGeneration, AutoTokenizer
8
 
 
 
 
 
 
9
  MODEL_ID = "Hamzasajjad38/t5-small-qg"
10
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
11
  model = T5ForConditionalGeneration.from_pretrained(MODEL_ID)
 
14
  nlp = spacy.load("en_core_web_sm")
15
 
16
  def generate_questions(passage, num_questions, question_type):
17
+ if not passage or len(passage.strip()) < 30:
18
+ return "Please enter a longer passage (at least 30 characters)."
 
19
  doc = nlp(passage)
20
+ candidates = [e.text for e in doc.ents]
 
21
  if len(candidates) < num_questions:
22
+ candidates += [c.text for c in doc.noun_chunks]
 
23
  seen, unique = set(), []
24
  for c in candidates:
25
+ cl = c.lower().strip()
26
+ if cl not in seen and 1 < len(c.split()) <= 5:
27
+ seen.add(cl)
28
  unique.append(c)
29
+ candidates = unique[:int(num_questions)]
 
30
  if not candidates:
31
+ return "Could not extract answer candidates. Try a more detailed passage."
 
32
  lines = []
33
  for i, answer in enumerate(candidates):
34
  highlighted = passage.replace(answer, f"<hl> {answer} <hl>", 1)
35
+ inp = tokenizer(f"generate question: {highlighted}",
36
+ return_tensors="pt", max_length=512,
37
+ truncation=True).to(device)
38
+ out = model.generate(**inp, max_new_tokens=64,
39
+ num_beams=4, early_stopping=True)
40
+ q = tokenizer.decode(out[0], skip_special_tokens=True)
 
41
  if question_type == "True / False":
42
+ q = f"True or False: {q.rstrip('?')}?"
43
+ lines.append(f"Q{i+1}: {q}")
 
44
  lines.append(f" Answer: {answer}\n")
 
45
  return "\n".join(lines)
46
 
47
+ demo = gr.Interface(
48
+ fn=generate_questions,
49
+ inputs=[
50
+ gr.Textbox(lines=8, label="Input Passage",
51
+ placeholder="Paste any educational paragraph here..."),
52
+ gr.Slider(1, 5, value=3, step=1, label="Number of Questions"),
53
+ gr.Radio(["Short answer", "True / False"],
54
+ value="Short answer", label="Question Type"),
55
+ ],
56
+ outputs=gr.Textbox(lines=14, label="Generated Questions"),
57
+ title="Automatic Question Generator",
58
+ description="Fine-tuned T5-small on SQuAD 1.1 — paste any passage to generate questions.",
59
+ examples=[
60
+ ["Photosynthesis is a process used by plants to convert light energy into chemical energy stored in glucose. It occurs inside chloroplasts using chlorophyll to absorb sunlight.", 3, "Short answer"],
61
+ ["The Industrial Revolution began in Britain in the late 18th century. The steam engine invented by James Watt transformed manufacturing and transportation across the country.", 3, "Short answer"],
62
+ ["Machine learning is a subset of artificial intelligence where systems learn from data to improve performance without being explicitly programmed.", 2, "True / False"],
63
+ ],
64
+ allow_flagging="never",
65
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
+ demo.launch()