import subprocess, sys subprocess.run([sys.executable, "-m", "spacy", "download", "en_core_web_sm"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) import streamlit as st import torch import spacy from transformers import T5ForConditionalGeneration, AutoTokenizer # ── Page config ────────────────────────────────────────────────────── st.set_page_config( page_title="QuestionForge — AI Question Generator", page_icon="⚡", layout="wide", initial_sidebar_state="collapsed", ) # ── Global CSS ──────────────────────────────────────────────────────── st.markdown(""" """, unsafe_allow_html=True) # ══════════════════════════════════════════════════════════════════════ # HERO # ══════════════════════════════════════════════════════════════════════ st.markdown("""
 T5 · SQuAD 1.1 · NLP · MPhil Data Science 
Automatic Question
Generation

Paste any educational passage and generate exam-ready questions instantly using a fine-tuned T5 Transformer.

0.4509ROUGE-1
0.2445ROUGE-2
0.4167ROUGE-L
0.1581BLEU
60MParameters
""", unsafe_allow_html=True) # ══════════════════════════════════════════════════════════════════════ # MODEL LOADING # ══════════════════════════════════════════════════════════════════════ @st.cache_resource(show_spinner=False) def load_model(): MODEL_ID = "Hamzasajjad38/t5-small-qg" tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True) model = T5ForConditionalGeneration.from_pretrained(MODEL_ID) device = "cuda" if torch.cuda.is_available() else "cpu" model = model.to(device) nlp = spacy.load("en_core_web_sm") return tokenizer, model, device, nlp with st.spinner("⚡ Loading T5 model from Hugging Face Hub…"): tokenizer, model, device, nlp = load_model() # ══════════════════════════════════════════════════════════════════════ # SAMPLE PASSAGES # ══════════════════════════════════════════════════════════════════════ SAMPLES = { "— paste your own —": "", "🌿 Biology — Photosynthesis": ( "Photosynthesis is a process used by plants and other organisms to convert light energy into " "chemical energy stored in glucose. This process takes place inside chloroplasts, where the " "green pigment chlorophyll absorbs sunlight. Carbon dioxide from the air and water from the " "soil are the main raw materials used during photosynthesis. Oxygen is released as a byproduct " "of this process, which is essential for most life on Earth." ), "⚙️ History — Industrial Revolution": ( "The Industrial Revolution began in Britain in the late 18th century and transformed " "manufacturing, agriculture, and transportation. The invention of the steam engine by James " "Watt in 1769 played a pivotal role, enabling factories to operate machinery and railroads to " "carry goods across the country. Workers migrated from rural areas to cities, leading to rapid " "urbanization and significant social changes across Europe and North America." ), "🤖 AI — Machine Learning": ( "Machine learning is a subset of artificial intelligence that enables systems to learn from " "data and improve their performance without being explicitly programmed. Supervised learning, " "unsupervised learning, and reinforcement learning are the three main paradigms. Neural networks " "form the foundation of deep learning, revolutionizing fields like computer vision, speech " "recognition, and natural language processing. Geoffrey Hinton, Yann LeCun, and Yoshua Bengio " "are widely regarded as the pioneers of deep learning." ), "🌍 Geography — Amazon River": ( "The Amazon River in South America is the largest river in the world by water discharge, " "carrying about 20 percent of all freshwater that flows into the oceans. It flows through " "Brazil, Peru, and Colombia, spanning over 6,400 kilometres. Its basin hosts the world's " "largest tropical rainforest, home to millions of plant and animal species. The Amazon plays " "a critical role in regulating Earth's climate by absorbing vast amounts of carbon dioxide." ), "🔬 Physics — Relativity": ( "Albert Einstein published his special theory of relativity in 1905 while working as a patent " "clerk in Bern, Switzerland. The theory introduced the famous equation E = mc², demonstrating " "that mass and energy are interchangeable. In 1915, Einstein extended this with the general " "theory of relativity, which described gravity as the curvature of spacetime caused by mass. " "In 1921, he received the Nobel Prize in Physics for the discovery of the law of the " "photoelectric effect." ), } # ══════════════════════════════════════════════════════════════════════ # LAYOUT — two columns # ══════════════════════════════════════════════════════════════════════ col_left, gap_col, col_right = st.columns([11, 1, 12]) # ── LEFT — INPUT ───────────────────────────────────────────────────── with col_left: st.markdown('
01 — Passage
', unsafe_allow_html=True) sample_key = st.selectbox( "Load a sample", list(SAMPLES.keys()), label_visibility="collapsed", ) default_text = SAMPLES[sample_key] passage = st.text_area( "passage", value=default_text, height=240, placeholder="Paste any educational paragraph here…", label_visibility="collapsed", ) st.markdown('
02 — Settings
', unsafe_allow_html=True) num_q = st.slider( "Number of questions", min_value=1, max_value=10, value=5, help="How many questions to generate (1 – 10)" ) st.markdown("
", unsafe_allow_html=True) gen_clicked = st.button("⚡ Generate Questions") # ══════════════════════════════════════════════════════════════════════ # GENERATION LOGIC # ══════════════════════════════════════════════════════════════════════ def generate_questions(passage: str, num_q: int): """ Extract candidate answer spans (NER + noun chunks), then run them through the fine-tuned T5 model. Returns a list of question strings (no answers). """ doc = nlp(passage) # Priority 1: Named entities candidates = [e.text.strip() for e in doc.ents if len(e.text.strip().split()) <= 5 and len(e.text.strip()) > 1] # Priority 2: Noun chunks (fallback / top-up) if len(candidates) < num_q: for chunk in doc.noun_chunks: c = chunk.text.strip() if 1 < len(c.split()) <= 5 and c not in candidates: candidates.append(c) # De-duplicate while preserving order seen, unique = set(), [] for c in candidates: key = c.lower() if key not in seen: seen.add(key) unique.append(c) candidates = unique[:num_q] if not candidates: return [] questions = [] seen_q = set() for answer in candidates: # Build highlighted input highlighted = passage.replace(answer, f" {answer} ", 1) inp = tokenizer( f"generate question: {highlighted}", return_tensors="pt", max_length=512, truncation=True ).to(device) out = model.generate( **inp, max_new_tokens=64, num_beams=4, early_stopping=True, no_repeat_ngram_size=3, ) q = tokenizer.decode(out[0], skip_special_tokens=True).strip() # Deduplicate questions if q.lower() not in seen_q: seen_q.add(q.lower()) questions.append(q) return questions # ── RIGHT — OUTPUT ──────────────────────────────────────────────────── with col_right: st.markdown('', unsafe_allow_html=True) if gen_clicked: if not passage or len(passage.strip()) < 40: st.warning("Please enter a more detailed passage (at least 40 characters) for best results.") else: with st.spinner("Generating questions with T5…"): questions = generate_questions(passage.strip(), num_q) if not questions: st.error("Could not extract answer candidates. Try a more detailed or factual passage.") else: # ── Result header ────────────────────────────────── st.markdown(f"""
⚡ {len(questions)} question{"s" if len(questions) != 1 else ""} generated
""", unsafe_allow_html=True) # ── Question cards (no answers) ──────────────────── download_text = "" for i, q in enumerate(questions, 1): st.markdown(f"""
{i}
{q}
""", unsafe_allow_html=True) download_text += f"Q{i}. {q}\n\n" # ── Download ─────────────────────────────────────── st.markdown("
", unsafe_allow_html=True) st.download_button( label="📄 Download questions (.txt)", data=download_text.strip(), file_name="questions.txt", mime="text/plain", ) else: # Empty state st.markdown("""
Questions appear here

Choose a sample or paste your passage,
set the number of questions, then click generate.

""", unsafe_allow_html=True) # ══════════════════════════════════════════════════════════════════════ # FOOTER # ══════════════════════════════════════════════════════════════════════ st.markdown("""

MPhil Data Science  ·  NLP Course Project  ·  M. Hamza Sajjad & Shanza Gul  ·  Supervised by Dr. Adnan Abid
Model: Hamzasajjad38/t5-small-qg  ·  Dataset: SQuAD 1.1

""", unsafe_allow_html=True)