Spaces:

Leen172
/

Question_generator

Build error

App Files Files Community

Leen172 commited on Nov 2, 2025

Commit

282d730

verified ·

1 Parent(s): ecd3544

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -479

app.py CHANGED Viewed

@@ -1,10 +1,10 @@
 # -*- coding: utf-8 -*-
 # صفحتان ثابتتان + Submit لكل سؤال يعمل فعليًا + منع تغيّر أبعاد صفحة الإدخال
-import os, json, uuid, random, unicodedata, difflib, traceback
 from dataclasses import dataclass
 from pathlib import Path
-from typing import List, Tuple, Optional
 from PIL import Image
 from pypdf import PdfReader
@@ -105,7 +105,7 @@ def postprocess(raw:str)->str:
     t = re2.sub(r"\[\d+\]", " ", t)
     return norm_ar(t)
-# ------------------ أدوات ذكية داخلية ------------------
 SENT_SPLIT = re2.compile(r"(?<=[\.!؟\?])\s+")
 AR_STOP = set("""في على من إلى عن مع لدى ذلك هذه هذا الذين التي الذي أو أم إن أن كان تكون كانوا كانت كنت ثم قد لقد ربما بل لكن إلا سوى حتى حيث كما لما ما لماذا متى أين كيف أي هناك هنا هؤلاء أولئك نحن هو هي هم هن أنت أنتم أنتن""".split())
@@ -120,430 +120,52 @@ def split_sents(t:str)->List[str]:
     s=[x.strip() for x in SENT_SPLIT.split(t) if x.strip()]
     return [x for x in s if len(x)>=25]
-# --- (A) عبارات مفتاحية 1–3 كلمات + إزالة التداخل ---
-def yake_keyphrases(t: str, top_k: int = 180) -> List[str]:
-    phrases = []
-    seen = set()
-    for n in [3, 2, 1]:
-        try:
-            ex = yake.KeywordExtractor(lan='ar', n=n, top=top_k)
-            pairs = ex.extract_keywords(t)
-        except Exception:
-            pairs = []
-        for w, _ in pairs:
-            w = re2.sub(r"\s+", " ", w.strip())
-            if not w or w in seen:
-                continue
-            if re2.match(r"^[\p{P}\p{S}\d_]+$", w):
-                continue
-            if 2 <= len(w) <= 42:
-                phrases.append(w); seen.add(w)
-    phrases_sorted = sorted(phrases, key=lambda x: (-len(x), x))
-    kept=[]
-    for p in phrases_sorted:
-        if not any((p != q and p in q) for q in kept):
-            kept.append(p)
-    return kept
 def good_kw(kw:str)->bool:
-    return kw and len(kw)>=2 and kw not in AR_STOP and not re2.match(r"^[\p{P}\p{S}\d_]+$", kw)
-# --- (B) تضمين جُمل/عبارات + كاش ---
-_EMB = None
-def get_embedder():
-    global _EMB
-    if _EMB is None:
-        try:
-            from sentence_transformers import SentenceTransformer
-            _EMB = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
-        except Exception:
-            _EMB = False
-    return _EMB
-def embed_texts(texts: List[str]):
-    emb = get_embedder()
-    if not emb:
-        return None
-    return emb.encode(texts, normalize_embeddings=True)
-# --- (C) Fill-Mask عربي (AraBERT) + كاش ---
-_MLM = None
-def get_masker():
-    global _MLM
-    if _MLM is None:
-        try:
-            from transformers import pipeline
-            _MLM = pipeline("fill-mask", model="aubmindlab/bert-base-arabertv02")
-        except Exception:
-            _MLM = False
-    return _MLM
-def mlm_fill(sentence_with_blank: str, correct: str, k: int = 20) -> List[str]:
-    masker = get_masker()
-    if not masker:
-        return []
-    masked = sentence_with_blank.replace("_____", masker.tokenizer.mask_token)
-    try:
-        outs = masker(masked, top_k=max(25, k+10))
-        cands = []
-        for o in outs:
-            tok = o["token_str"].strip()
-            if tok and tok != correct and len(tok) >= 2 and not re2.match(r"^[\p{P}\p{S}\d_]+$", tok):
-                cands.append(tok)
-        seen=set(); uniq=[]
-        for w in cands:
-            if w not in seen:
-                uniq.append(w); seen.add(w)
-        return uniq[:k]
-    except Exception:
-        return []
-# --- (D) جيران دلاليًا لعبارة الهدف ---
-def nearest_terms(target: str, pool: List[str], k: int = 32) -> List[str]:
-    emb = get_embedder()
-    if not emb:
-        return []
-    cand = [w for w in pool if w != target and len(w) >= 2 and not re2.match(r"^[\p{P}\p{S}\d_]+$", w)]
-    if not cand:
-        return []
-    vecs = emb.encode([target] + cand, normalize_embeddings=True)
-    t, C = vecs[0], vecs[1:]
-    import numpy as np
-    sims = (C @ t)
-    idx = np.argsort(-sims)[:k]
-    return [cand[i] for i in idx]
-# --- (E) POS اختياري عبر Camel Tools ---
-_TAGGER = None
-def get_tagger():
-    global _TAGGER
-    if _TAGGER is None:
-        try:
-            from camel_tools.disambig.mle import MLEDisambiguator
-            _TAGGER = MLEDisambiguator.pretrained()
-        except Exception:
-            _TAGGER = False
-    return _TAGGER
-def phrase_pos(phrase: str) -> Optional[str]:
-    tagger = get_tagger()
-    if not tagger:
-        return None
-    try:
-        toks = phrase.split()
-        res = tagger.disambiguate(toks)
-        return res[0].analyses[0].pos
-    except Exception:
-        return None
-def same_pos(a: str, b: str) -> bool:
-    pa, pb = phrase_pos(a), phrase_pos(b)
-    if pa is None or pb is None:
-        return True
-    return (pa == pb)
-# --- (F) تطبيع صرفي بسيط (الـ) ---
-def strip_al(s: str) -> str:
-    return re2.sub(r"^\s*ال", "", s)
-def with_same_definiteness(ref: str, cand: str) -> str:
-    ref_has_al = re2.match(r"^\s*ال", ref) is not None
-    cand_has_al = re2.match(r"^\s*ال", cand) is not None
-    if ref_has_al and not cand_has_al:
-        return "ال" + cand
-    if (not ref_has_al) and cand_has_al:
-        return strip_al(cand)
-    return cand
-# --- (G) تقييم الجملة للسؤال ---
-def sentence_score(s: str) -> float:
-    L = len(s)
-    base = 1.0 if (70 <= L <= 240) else -1.0
-    punct = len(re2.findall(r"[^\p{L}\p{N}\s]", s))
-    digits = len(re2.findall(r"\d", s))
-    penalties = 0.0
-    if punct > 10: penalties -= 0.5
-    if digits > 6: penalties -= 0.5
-    bonus = 0.2 if ("،" in s or ":" in s) else 0.0
-    return base + bonus + penalties
-# ================== (NEW) جودة المشتِّتات والتطويل ==================
-global_full_text_cache = ""
-ref_phrase_cache = {}
-ADJ_WHITELIST = {"التعليمية","الذكية","الرقمية","الافتراضية","التكيفية","الحديثة","المتقدمة"}
-NOUN_PREFIXES = {"مجال","تقنيات","أنظمة","مفاهيم","نماذج","ممارسات","آليات","تطبيقات"}
-def is_arabic_word(w:str)->bool:
-    return bool(re2.match(r"^[\p{Arabic}]+$", w))
-def clean_spaces(s:str)->str:
-    s = re2.sub(r"\s+", " ", s).strip()
-    s = re2.sub(r"\bال\s+ال\b", "ال", s)
-    return s
-def bad_token(w:str)->bool:
-    return (not is_arabic_word(w)) or (len(w) < 2 or len(w) > 18)
-def looks_weird(phrase:str)->bool:
-    toks = [t for t in re2.split(r"\s+", phrase.strip()) if t]
-    if len(toks) == 0: return True
-    if any(bad_token(t) for t in toks): return True
-    for i in range(1, len(toks)):
-        if toks[i] == toks[i-1]:
-            return True
-    if len(set(toks)) <= len(toks) - 1:
-        if any(toks.count(t) > 1 for t in toks):
-            return True
-    pos = [phrase_pos(t) or "" for t in toks]
-    streak = 0
-    for p in pos:
-        if p.startswith("ADJ"):
-            streak += 1
-            if streak > 2: return True
-        else:
-            streak = 0
-    return False
-def quality_score(phrase:str, sentence:str, full_text:str)->float:
-    phrase = clean_spaces(phrase)
-    if looks_weird(phrase):
-        return 0.0
-    hits = sum(1 for t in set(phrase.split()) if t in full_text)
-    toks = phrase.split()
-    pos0 = phrase_pos(toks[0]) or ""
-    pos1 = phrase_pos(toks[1]) if len(toks)>1 else ""
-    nominal_bonus = 0.2 if (pos0.startswith("N") and (not pos1 or pos1.startswith("ADJ"))) else 0.0
-    return min(1.0, 0.3 + 0.1*hits + nominal_bonus)
-def word_len(s: str) -> int:
-    return len([w for w in re2.split(r"\s+", s.strip()) if w])
-def within_ratio(cand: str, target_len: int, tol: float = 0.15) -> bool:
-    L = word_len(cand)
-    return (target_len*(1-tol) <= L <= target_len*(1+tol))
-def shape_phrase_like(ref: str, cand: str) -> str:
-    return with_same_definiteness(ref, cand)
-def try_mlm_expand(cand: str, sentence_with_blank: str, target_len: int) -> Optional[str]:
-    masker = get_masker()
-    if not masker:
-        return None
-    trials = [
-        sentence_with_blank.replace("_____", f"{masker.tokenizer.mask_token} {cand}"),
-        sentence_with_blank.replace("_____", f"{cand} {masker.tokenizer.mask_token}")
-    ]
-    for masked in trials:
-        try:
-            outs = masker(masked, top_k=12)
-        except Exception:
-            continue
-        for o in outs:
-            tok = o["token_str"].strip()
-            if not is_arabic_word(tok):
-                continue
-            if masked.startswith(masker.tokenizer.mask_token):
-                if tok not in NOUN_PREFIXES:
-                    continue
-                phrase = f"{tok} {cand}"
-            else:
-                if tok not in ADJ_WHITELIST:
-                    continue
-                phrase = f"{cand} {tok}"
-            phrase = clean_spaces(phrase)
-            if within_ratio(phrase, target_len, tol=0.15) and norm_ar(phrase) != norm_ar(ref_phrase_cache.get("correct","")) and not looks_weird(phrase):
-                return phrase
-    return None
-def fallback_expand(cand: str, target_len: int) -> str:
-    for p in NOUN_PREFIXES:
-        phrase = f"{p} {cand}"
-        if within_ratio(phrase, target_len, tol=0.15):
-            return clean_spaces(phrase)
-    for sfx in ADJ_WHITELIST:
-        phrase = f"{cand} {sfx}"
-        if within_ratio(phrase, target_len, tol=0.15):
-            return clean_spaces(phrase)
-    candidates = [f"{p} {cand}" for p in NOUN_PREFIXES] + [f"{cand} {sfx}" for sfx in ADJ_WHITELIST]
-    candidates = sorted(candidates, key=lambda ph: abs(word_len(ph) - target_len))
-    return clean_spaces(candidates[0])
-# --- (H*) ترتيب المرشّحات بالانسجام + الجودة + منع التشابه ---
-def rank_by_sentence_coherence(sentence_with_blank: str, correct: str, candidates: List[str], topk: int=3, full_text: str="") -> List[str]:
-    emb = get_embedder()
-    if not candidates:
-        return []
-    coherence = {}
-    if emb:
-        filled = [sentence_with_blank.replace("_____", c) for c in candidates]
-        ref = sentence_with_blank.replace("_____", correct)
-        vecs = embed_texts([ref] + filled)
-        if vecs is not None:
-            import numpy as np
-            ref_vec = vecs[0]; cand_vecs = vecs[1:]
-            sims = cand_vecs @ ref_vec
-            for i, c in enumerate(candidates):
-                coherence[c] = float(sims[i])
-    qscore = {c: quality_score(c, sentence_with_blank, full_text) for c in candidates}
-    def final_score(c):
-        coh = coherence.get(c, 0.0)
-        return 0.7*coh + 0.3*qscore.get(c, 0.0)
-    ranked = sorted(candidates, key=lambda c: final_score(c), reverse=True)
-    kept = []
-    for c in ranked:
-        if all(difflib.SequenceMatcher(None, c, x).ratio() < 0.90 for x in kept):
-            kept.append(c)
-        if len(kept) >= topk:
-            break
-    return kept[:topk]
-# --- (I) حصاد مصطلحات احتياطية عالية التكرار من النص كله ---
-def harvest_backup_terms(text: str, limit: int = 400) -> List[str]:
-    toks = re2.findall(r"[\p{L}][\p{L}\p{N}_\-]{1,}", text)
-    stats = {}
-    for t in toks:
-        tt = norm_ar(t)
-        if not good_kw(tt):
-            continue
-        stats[tt] = stats.get(tt, 0) + 1
-    top = [w for w,_ in sorted(stats.items(), key=lambda kv: -kv[1])]
-    return top[:limit]
-# --- (J) مشتّتات ذكية تضمن دائمًا ≥3 خيارات فعلية + موازنة الطول ---
-def smart_distractors(correct: str, phrase_pool: List[str], sentence_with_blank: str, backup_terms: List[str], k: int = 3) -> List[str]:
-    target = correct.strip()
-    ref_phrase_cache["correct"] = target
-    neigh = nearest_terms(target, phrase_pool, k=48)
-    mlm  = mlm_fill(sentence_with_blank, target, k=24)
-    raw_pool = []
-    seen=set()
-    for w in neigh + mlm + phrase_pool:
-        w = w.strip()
-        if not w or norm_ar(w) == norm_ar(target):
-            continue
-        if w in AR_STOP or re2.match(r"^[\p{P}\p{S}\d_]+$", w):
-            continue
-        if w not in seen:
-            seen.add(w); raw_pool.append(w)
-    for w in backup_terms:
-        if len(raw_pool) >= max(60, k*10): break
-        if not w or norm_ar(w) == norm_ar(target):
-            continue
-        if w in AR_STOP or re2.match(r"^[\p{P}\p{S}\d_]+$", w):
-            continue
-        if w not in seen:
-            seen.add(w); raw_pool.append(w)
-    filtered = []
-    for w in raw_pool:
-        if same_pos(target, w):
-            filtered.append(w)
-        if len(filtered) >= max(24, k*6):
-            break
-    if not filtered:
-        filtered = raw_pool[:max(24, k*6)]
-    target_words = word_len(target)
-    shaped = []
-    for w in filtered:
-        cand = shape_phrase_like(target, w)
-        if within_ratio(cand, target_words, tol=0.15) and not looks_weird(cand):
-            shaped.append(clean_spaces(cand))
-            continue
-        expanded = try_mlm_expand(cand, sentence_with_blank, target_words)
-        if expanded and within_ratio(expanded, target_words, tol=0.15) and not looks_weird(expanded):
-            shaped.append(clean_spaces(expanded))
-            continue
-        fb = fallback_expand(cand, target_words)
-        if not looks_weird(fb):
-            shaped.append(clean_spaces(fb))
-    shaped = [s for s in shaped if norm_ar(s) != norm_ar(target)]
-    ranked = rank_by_sentence_coherence(
-        sentence_with_blank, target, shaped, topk=max(k, 12), full_text=global_full_text_cache
-    )
-    out = []
-    for src in [ranked, shaped, filtered, raw_pool, backup_terms]:
-        for w in src:
-            if len(out) >= k: break
-            if w and norm_ar(w) != norm_ar(target) and w not in out and not looks_weird(w):
-                out.append(w)
-        if len(out) >= k: break
-    if len(out) < k:
-        while len(out) < k and ranked:
-            out.append(ranked[len(out) % len(ranked)])
-    return out[:k]
-# ------------------ مُولِّد الأسئلة ------------------
 def make_mcqs(text:str, n:int=6)->List[MCQ]:
-    global global_full_text_cache
-    global_full_text_cache = text
-    sents = split_sents(text)
-    if not sents:
-        raise ValueError("النص قصير أو غير صالح.")
-    keyphrases = [kp for kp in yake_keyphrases(text, top_k=180) if good_kw(kp)]
-    if not keyphrases:
-        tokens = [t for t in re2.findall(r"[\p{L}\p{N}_]+", text) if good_kw(t)]
-        freq = [w for w,_ in sorted(((t, text.count(t)) for t in tokens), key=lambda x:-x[1])]
-        keyphrases = freq[:160]
-    backup_terms = harvest_backup_terms(text, limit=400)
-    kp2best_sent = {}
-    for kp in keyphrases:
-        best_s, best_sc = None, -9e9
-        pat = re2.compile(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})")
-        for s in sents:
-            if pat.search(s):
-                sc = sentence_score(s)
-                if sc > best_sc:
-                    best_s, best_sc = s, sc
-        if best_s is not None:
-            kp2best_sent[kp] = (best_s, best_sc)
-    if not kp2best_sent:
-        raise RuntimeError("تعذّر توليد أسئلة من هذا النص.")
-    order = sorted(kp2best_sent.items(), key=lambda kv: (-len(kv[0]), -kv[1][1], kv[0]))
-    items=[]; used_sents=set(); used_keys=set()
-    for kp, (s, _) in order:
-        if len(items) >= n:
-            break
-        if s in used_sents or kp in used_keys:
-            continue
-        q = re2.sub(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", "_____", s, count=1)
-        pool = [x for x in keyphrases if x != kp]
-        distracts = smart_distractors(kp, pool, q, backup_terms, k=3)
-        ch = distracts + [kp]
-        # ترتيب غير عشوائي: تدوير حتمي لموضع الصحيحة
-        ch_sorted = sorted(ch, key=lambda c: c != kp)
-        rot = (len(items) + (hash(kp) & 3)) % 4
-        ch = ch_sorted[-rot:] + ch_sorted[:-rot]
-        ans = ch.index(kp)
         items.append(MCQ(id=str(uuid.uuid4())[:8], question=q, choices=ch, answer_index=ans))
-        used_sents.add(s); used_keys.add(kp)
-    if not items:
-        raise RuntimeError("تعذّر توليد أسئلة.")
     return items
 def to_records(items:List[MCQ])->List[dict]:
@@ -553,9 +175,7 @@ def to_records(items:List[MCQ])->List[dict]:
         for i,lbl in enumerate(["A","B","C","D"]):
             txt=(it.choices[i] if i<len(it.choices) else "—").strip()
             txt=txt.replace(",", "،").replace("?", "؟").replace(";", "؛")
-            if txt == "—" or not txt:
-                txt = "خيار"
-            opts.append({"id":lbl,"text":txt or "خيار","is_correct":(i==it.answer_index)})
         recs.append({"id":it.id,"question":it.question.strip(),"options":opts})
     return recs
@@ -594,56 +214,17 @@ def render_quiz_html(records: List[dict]) -> str:
 # ------------------ توليد الامتحان وتبديل الصفحات ------------------
 def build_quiz(text_area, file_path, n, model_id, zoom):
-    try:
-        text_area = (text_area or "").strip()
-        if not text_area and not file_path:
-            return "", gr.update(visible=True), gr.update(visible=False), "🛈 الصق نصًا أو ارفع ملفًا أولًا."
-        if text_area:
-            raw = text_area
-        else:
-            if isinstance(file_path, (list, tuple)) and file_path:
-                file_path = file_path[0]
-            if not file_path or not os.path.exists(file_path):
-                return "", gr.update(visible=True), gr.update(visible=False), "⚠️ تعذّر الوصول للملف المرفوع."
-            raw, _ = file_to_text(str(file_path), model_id=model_id, zoom=float(zoom))
-        cleaned = postprocess(raw)
-        try:
-            items = make_mcqs(cleaned, n=int(n))
-        except Exception as inner_e:
-            # Fallback بسيط يضمن توليد أسئلة حتى لو تعطل المسار الذكي
-            sents = split_sents(cleaned)[:int(n)*2]
-            if not sents:
-                raise inner_e
-            recs_items = []
-            import itertools
-            for s in sents:
-                toks = [t for t in re2.findall(r"[\p{L}]{3,}", s) if t not in AR_STOP]
-                if len(toks) < 4:
-                    continue
-                kw = toks[len(toks)//3]
-                q  = re2.sub(rf"(?<!\p{{L}}){re2.escape(kw)}(?!\p{{L}})", "_____", s, count=1)
-                pool = [w for w in toks if w != kw][:30]
-                random.shuffle(pool)
-                dis = list(dict.fromkeys(pool))[:3]
-                while len(dis) < 3: dis.append("اختيار")
-                ch = dis + [kw]; random.shuffle(ch)
-                recs_items.append(MCQ(id=str(uuid.uuid4())[:8], question=q, choices=ch, answer_index=ch.index(kw)))
-                if len(recs_items) >= int(n):
-                    break
-            if not recs_items:
-                raise inner_e
-            items = recs_items
-        recs    = to_records(items)
-        html    = render_quiz_html(recs)
-        return html, gr.update(visible=False), gr.update(visible=True), ""
-    except Exception as e:
-        err = f"❌ حدث خطأ أثناء التوليد:\n```\n{str(e)}\n```"
-        traceback.print_exc()
-        return "", gr.update(visible=True), gr.update(visible=False), err
 # ------------------ CSS ------------------
 CSS = """
@@ -651,7 +232,7 @@ CSS = """
   --bg:#0e0e11; --panel:#15161a; --card:#1a1b20; --muted:#a7b0be;
   --text:#f6f7fb; --accent:#6ee7b7; --accent2:#34d399; --danger:#ef4444; --border:#262833;
 }
-body{direction:rtl; font-family:system-ui,'Cairo','IBM Plex Arabic',sans-serif; background:#0e0e11;}
 .gradio-container{max-width:980px;margin:0 auto;padding:12px 12px 40px;}
 h2.top{color:#eaeaf2;margin:6px 0 16px}
@@ -674,7 +255,7 @@ textarea{min-height:120px}
 .q-header{display:flex;gap:10px;align-items:center;justify-content:space-between;margin-bottom:6px}
 .q-title{color:#eaeaf2;font-weight:800}
 .q-badge{padding:8px 12px;border-radius:10px;font-weight:700}
-.q-badge.ok{background:#0f2f22;color:#b6f4db;border:1px solid #145b44}
 .q-badge.err{background:#3a0d14;color:#ffd1d6;border:1px solid #6a1e2b}
 .q-text{color:var(--text);font-size:1.06rem;line-height:1.8;margin:8px 0 12px}
@@ -698,6 +279,7 @@ textarea{min-height:120px}
 # ------------------ JS: ربط Submit بعد الرندر (مع Output مخفي لضمان التنفيذ) ------------------
 ATTACH_LISTENERS_JS = """
 () => {
   if (window.__q_submit_bound_multi2) { return 'already'; }
   window.__q_submit_bound_multi2 = true;
@@ -720,28 +302,35 @@ ATTACH_LISTENERS_JS = """
     const chosenLabel = chosen.closest('.opt');
     if (chosen.value === correct) {
       chosenLabel.classList.add('ok');
       if (badge){ badge.hidden=false; badge.className='q-badge ok'; badge.textContent='Correct!'; }
       card.querySelectorAll('input[type="radio"]').forEach(i => i.disabled = true);
       e.target.disabled = true;
       if (note) note.textContent = '';
       return;
     }
-    chosenLabel.classList.add('err');
     if (badge){ badge.hidden=false; badge.className='q-badge err'; badge.textContent='Incorrect.'; }
     if (note) note.textContent = '';
   });
   return 'wired-multi2';
 }
 """
 # ------------------ واجهة Gradio ------------------
 with gr.Blocks(title="Question Generator", css=CSS) as demo:
     gr.Markdown("<h2 class='top'>Question Generator</h2>")
     page1 = gr.Group(visible=True, elem_classes=["input-panel"])
     with page1:
         gr.Markdown("اختر **أحد** الخيارين ثم اضغط الزر.", elem_classes=["small"])
@@ -763,11 +352,13 @@ with gr.Blocks(title="Question Generator", css=CSS) as demo:
         btn_build = gr.Button("generate quistion", elem_classes=["button-primary"])
         warn = gr.Markdown("", elem_classes=["small"])
     page2 = gr.Group(visible=False)
     with page2:
         quiz_html = gr.HTML("")
-        js_wired  = gr.Textbox(visible=False)
     btn_build.click(
         build_quiz,
         inputs=[text_area, file_comp, num_q, trocr_model, trocr_zoom],

 # -*- coding: utf-8 -*-
 # صفحتان ثابتتان + Submit لكل سؤال يعمل فعليًا + منع تغيّر أبعاد صفحة الإدخال
+import os, json, uuid, random, unicodedata
 from dataclasses import dataclass
 from pathlib import Path
+from typing import List, Tuple
 from PIL import Image
 from pypdf import PdfReader
     t = re2.sub(r"\[\d+\]", " ", t)
     return norm_ar(t)
+# ------------------ توليد أسئلة ------------------
 SENT_SPLIT = re2.compile(r"(?<=[\.!؟\?])\s+")
 AR_STOP = set("""في على من إلى عن مع لدى ذلك هذه هذا الذين التي الذي أو أم إن أن كان تكون كانوا كانت كنت ثم قد لقد ربما بل لكن إلا سوى حتى حيث كما لما ما لماذا متى أين كيف أي هناك هنا هؤلاء أولئك نحن هو هي هم هن أنت أنتم أنتن""".split())
     s=[x.strip() for x in SENT_SPLIT.split(t) if x.strip()]
     return [x for x in s if len(x)>=25]
+def yake_keywords(t:str, k:int=160)->List[str]:
+    ex = yake.KeywordExtractor(lan='ar', n=1, top=k)
+    cands = [w for w,_ in ex.extract_keywords(t)]
+    out=[]; seen=set()
+    for k in cands:
+        k=k.strip()
+        if not k or k in seen or k in AR_STOP: continue
+        if len(k)<3 or re2.match(r"^[\p{P}\p{S}]+$",k): continue
+        seen.add(k); out.append(k)
+    return out
 def good_kw(kw:str)->bool:
+    return kw and len(kw)>=3 and kw not in AR_STOP and not re2.match(r"^[\p{P}\p{S}\d_]+$", kw)
+def distractors(correct:str, pool:List[str], k:int=3)->List[str]:
+    L=len(correct.strip()); cand=[]
+    for w in pool:
+        w=w.strip()
+        if not w or w==correct or w in AR_STOP: continue
+        if re2.match(r"^[\p{P}\p{S}\d_]+$", w): continue
+        if abs(len(w)-L)<=3: cand.append(w)
+    random.shuffle(cand)
+    out=cand[:k]
+    while len(out)<k: out.append("—")
+    return out
 def make_mcqs(text:str, n:int=6)->List[MCQ]:
+    sents=split_sents(text)
+    if not sents: raise ValueError("النص قصير أو غير صالح.")
+    kws=yake_keywords(text) or [w for w,_ in sorted(((t, text.count(t)) for t in re2.findall(r"[\p{L}\p{N}_]+",text)), key=lambda x:-x[1])][:80]
+    sent_for={}
+    for s in sents:
+        for kw in kws:
+            if good_kw(kw) and re2.search(rf"(?<!\p{{L}}){re2.escape(kw)}(?!\p{{L}})", s) and kw not in sent_for:
+                sent_for[kw]=s
+    items=[]; used=set()
+    for kw in [k for k in kws if k in sent_for]:
+        if len(items)>=n: break
+        s=sent_for[kw]
+        if s in used: continue
+        q=re2.sub(rf"(?<!\p{{L}}){re2.escape(kw)}(?!\p{{L}})", "_____", s, count=1)
+        ch=distractors(kw, [x for x in kws if x!=kw], 3)+[kw]
+        random.shuffle(ch); ans=ch.index(kw)
         items.append(MCQ(id=str(uuid.uuid4())[:8], question=q, choices=ch, answer_index=ans))
+        used.add(s)
+    if not items: raise RuntimeError("تعذّر توليد أسئلة.")
     return items
 def to_records(items:List[MCQ])->List[dict]:
         for i,lbl in enumerate(["A","B","C","D"]):
             txt=(it.choices[i] if i<len(it.choices) else "—").strip()
             txt=txt.replace(",", "،").replace("?", "؟").replace(";", "؛")
+            opts.append({"id":lbl,"text":txt or "—","is_correct":(i==it.answer_index)})
         recs.append({"id":it.id,"question":it.question.strip(),"options":opts})
     return recs
 # ------------------ توليد الامتحان وتبديل الصفحات ------------------
 def build_quiz(text_area, file_path, n, model_id, zoom):
+    text_area = (text_area or "").strip()
+    if not text_area and not file_path:
+        return "", gr.update(visible=True), gr.update(visible=False), "🛈 الصق نصًا أو ارفع ملفًا أولًا."
+    if text_area:
+        raw = text_area
+    else:
+        raw, _ = file_to_text(file_path, model_id=model_id, zoom=float(zoom))
+    cleaned = postprocess(raw)
+    items   = make_mcqs(cleaned, n=int(n))
+    recs    = to_records(items)
+    return render_quiz_html(recs), gr.update(visible=False), gr.update(visible=True), ""
 # ------------------ CSS ------------------
 CSS = """
   --bg:#0e0e11; --panel:#15161a; --card:#1a1b20; --muted:#a7b0be;
   --text:#f6f7fb; --accent:#6ee7b7; --accent2:#34d399; --danger:#ef4444; --border:#262833;
 }
+body{direction:rtl; font-family:system-ui,'Cairo','IBM Plex Arabic',sans-serif; background:var(--bg);}
 .gradio-container{max-width:980px;margin:0 auto;padding:12px 12px 40px;}
 h2.top{color:#eaeaf2;margin:6px 0 16px}
 .q-header{display:flex;gap:10px;align-items:center;justify-content:space-between;margin-bottom:6px}
 .q-title{color:#eaeaf2;font-weight:800}
 .q-badge{padding:8px 12px;border-radius:10px;font-weight:700}
+.q-badge.ok{background:#083a2a;color:#b6f4db;border:1px solid #145b44}
 .q-badge.err{background:#3a0d14;color:#ffd1d6;border:1px solid #6a1e2b}
 .q-text{color:var(--text);font-size:1.06rem;line-height:1.8;margin:8px 0 12px}
 # ------------------ JS: ربط Submit بعد الرندر (مع Output مخفي لضمان التنفيذ) ------------------
 ATTACH_LISTENERS_JS = """
 () => {
+  // اربط مرة واحدة فقط
   if (window.__q_submit_bound_multi2) { return 'already'; }
   window.__q_submit_bound_multi2 = true;
     const chosenLabel = chosen.closest('.opt');
+    // حالة صحيحة: لوّن أخضر وأقفل السؤال كاملاً
     if (chosen.value === correct) {
       chosenLabel.classList.add('ok');
       if (badge){ badge.hidden=false; badge.className='q-badge ok'; badge.textContent='Correct!'; }
+      // أقفل هذا السؤال فقط بعد الصح
       card.querySelectorAll('input[type="radio"]').forEach(i => i.disabled = true);
       e.target.disabled = true;
       if (note) note.textContent = '';
       return;
     }
+    // حالة خاطئة: لوّن أحمر فقط، ولا تعطل أي شيء — ليقدر يجرّب خيار آخر
+    chosenLabel.classList.add('err');         // اتركه أحمر
     if (badge){ badge.hidden=false; badge.className='q-badge err'; badge.textContent='Incorrect.'; }
     if (note) note.textContent = '';
+    // مهم: لا تعطّل الراديو ولا الزر
   });
   return 'wired-multi2';
 }
 """
 # ------------------ واجهة Gradio ------------------
 with gr.Blocks(title="Question Generator", css=CSS) as demo:
     gr.Markdown("<h2 class='top'>Question Generator</h2>")
+    # الصفحة 1: إدخال ثابت لا تتغير أبعاده
     page1 = gr.Group(visible=True, elem_classes=["input-panel"])
     with page1:
         gr.Markdown("اختر **أحد** الخيارين ثم اضغط الزر.", elem_classes=["small"])
         btn_build = gr.Button("generate quistion", elem_classes=["button-primary"])
         warn = gr.Markdown("", elem_classes=["small"])
+    # الصفحة 2: الأسئلة
     page2 = gr.Group(visible=False)
     with page2:
         quiz_html = gr.HTML("")
+        js_wired  = gr.Textbox(visible=False)   # Output مخفي لضمان تنفيذ JS
+    # بناء الامتحان + تبديل الصفحات + ربط الـJS
     btn_build.click(
         build_quiz,
         inputs=[text_area, file_comp, num_q, trocr_model, trocr_zoom],