Leen172 commited on
Commit
3034391
·
verified ·
1 Parent(s): 6b03645

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +582 -136
app.py CHANGED
@@ -1,10 +1,12 @@
1
  # -*- coding: utf-8 -*-
2
- # صفحتان ثابتتان + Submit لكل سؤال يعمل فعليًا + منع تغيّر أبعاد صفحة الإدخال
 
 
3
 
4
  import os, json, uuid, random, unicodedata
5
  from dataclasses import dataclass
6
  from pathlib import Path
7
- from typing import List, Tuple
8
 
9
  from PIL import Image
10
  from pypdf import PdfReader
@@ -13,21 +15,34 @@ import regex as re2
13
  import yake
14
  import gradio as gr
15
 
16
- # ------------------ إعدادات عامة ------------------
17
  random.seed(42)
18
  DEFAULT_NUM_QUESTIONS = 6
19
  DEFAULT_TROCR_MODEL = "microsoft/trocr-base-printed"
20
  DEFAULT_TROCR_ZOOM = 2.6
 
 
 
 
 
 
 
 
 
21
 
22
  # ------------------ OCR (تحميل كسول) ------------------
23
  _OCR = {}
24
  def get_ocr(model_id: str):
25
- from transformers import pipeline
26
- import torch
27
- dev = 0 if torch.cuda.is_available() else -1
28
- if model_id not in _OCR:
29
- _OCR[model_id] = pipeline("image-to-text", model=model_id, device=dev)
30
- return _OCR[model_id]
 
 
 
 
31
 
32
  # ------------------ PDF/TXT → نص ------------------
33
  def extract_text_with_pypdf(path: str) -> str:
@@ -105,7 +120,7 @@ def postprocess(raw:str)->str:
105
  t = re2.sub(r"\[\d+\]", " ", t)
106
  return norm_ar(t)
107
 
108
- # ------------------ توليد أسئلة (تحسينات كبيرة داخليًا فقط) ------------------
109
  SENT_SPLIT = re2.compile(r"(?<=[\.!؟\?])\s+")
110
  AR_STOP = set("""في على من إلى عن مع لدى ذلك هذه هذا الذين التي الذي أو أم إن أن كان تكون كانوا كانت كنت ثم قد لقد ربما بل لكن إلا سوى حتى حيث كما لما ما لماذا متى أين كيف أي هناك هنا هؤلاء أولئك نحن هو هي هم هن أنت أنتم أنتن""".split())
111
 
@@ -120,8 +135,8 @@ def split_sents(t:str)->List[str]:
120
  s=[x.strip() for x in SENT_SPLIT.split(t) if x.strip()]
121
  return [x for x in s if len(x)>=25]
122
 
123
- # ====== (1) عبارات مفتاحية أذكى: n=3 ثم 2 ثم 1، مع فلترة ======
124
- def yake_keywords(t: str, k: int = 160) -> List[str]:
125
  phrases = []
126
  seen = set()
127
  for n in [3, 2, 1]:
@@ -144,7 +159,56 @@ def yake_keywords(t: str, k: int = 160) -> List[str]:
144
  def good_kw(kw:str)->bool:
145
  return kw and len(kw)>=2 and kw not in AR_STOP and not re2.match(r"^[\p{P}\p{S}\d_]+$", kw)
146
 
147
- # ====== (2) جيران دلاليًا + (3) FILL-MASK كبديل ======
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  _EMB = None
149
  def get_embedder():
150
  global _EMB
@@ -153,10 +217,10 @@ def get_embedder():
153
  from sentence_transformers import SentenceTransformer
154
  _EMB = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
155
  except Exception:
156
- _EMB = False # تشير لتعطّل التحميل
157
  return _EMB
158
 
159
- def nearest_terms(target: str, pool: List[str], k: int = 12) -> List[Tuple[str, float]]:
160
  emb = get_embedder()
161
  if not emb:
162
  return []
@@ -166,7 +230,7 @@ def nearest_terms(target: str, pool: List[str], k: int = 12) -> List[Tuple[str,
166
  vecs = emb.encode([target] + cand, normalize_embeddings=True)
167
  t, C = vecs[0], vecs[1:]
168
  import numpy as np
169
- sims = (C @ t) # cosine لأن المتجهات مُطبّعة
170
  idx = np.argsort(-sims)[:k]
171
  return [(cand[i], float(sims[i])) for i in idx]
172
 
@@ -181,19 +245,19 @@ def get_masker():
181
  _MLM = False
182
  return _MLM
183
 
184
- def mlm_distractors(sentence_with_blank: str, correct: str, k: int = 8) -> List[str]:
185
  masker = get_masker()
186
  if not masker:
187
  return []
188
  masked = sentence_with_blank.replace("_____", masker.tokenizer.mask_token)
189
  try:
190
- outs = masker(masked, top_k=max(15, k+5))
191
  cands = []
192
  for o in outs:
193
  tok = o["token_str"].strip()
194
  if tok and tok != correct and len(tok) >= 2 and not re2.match(r"^[\p{P}\p{S}\d_]+$", tok):
195
  cands.append(tok)
196
- seen = set(); uniq = []
197
  for w in cands:
198
  if w not in seen:
199
  uniq.append(w); seen.add(w)
@@ -201,127 +265,487 @@ def mlm_distractors(sentence_with_blank: str, correct: str, k: int = 8) -> List[
201
  except Exception:
202
  return []
203
 
204
- def legacy_distractors(correct:str, pool:List[str], k:int=3)->List[str]:
205
- # النسخة القديمة كنسخة احتياط
206
- L=len(correct.strip()); cand=[]
207
- for w in pool:
208
- w=w.strip()
209
- if not w or w==correct or w in AR_STOP: continue
210
- if re2.match(r"^[\p{P}\p{S}\d_]+$", w): continue
211
- if abs(len(w)-L)<=3: cand.append(w)
212
- random.shuffle(cand)
213
- out=cand[:k]
214
- while len(out)<k: out.append("—")
215
- return out
216
-
217
- def smart_distractors(correct: str, phrase_pool: List[str], sentence: str, k: int = 3) -> List[str]:
218
- # 1) جيران دلاليًا
219
- neigh = nearest_terms(correct, phrase_pool, k=12)
220
- neigh = [w for w,sim in neigh if w != correct][:k+4]
221
-
222
- # 2) FILL-MASK على الجملة (بديل)
223
- if len(neigh) < k:
224
- mlm = mlm_distractors(sentence.replace(correct, "_____"), correct, k=10)
225
- for w in mlm:
226
- if w not in neigh and w != correct:
227
- neigh.append(w)
228
- if len(neigh) >= k+4:
229
- break
230
-
231
- # 3) فلترة خفيفة
232
- out = []
233
- L = len(correct)
234
- for w in neigh:
235
- if w in AR_STOP:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  continue
237
- if abs(len(w) - L) > max(6, L//2):
238
  continue
239
- if norm_ar(w) == norm_ar(correct):
240
  continue
241
- out.append(w)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
  if len(out) >= k:
243
  break
244
 
245
- # 4) رجوع للخطة القديمة إذا ما كفى
246
  if len(out) < k:
247
- extra = legacy_distractors(correct, phrase_pool, k=k-len(out))
248
- out.extend(extra)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
 
250
- while len(out) < k:
251
- out.append("—")
252
- return out
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
 
254
- # ====== (4) مُولِّد أسئلة جديد بمحافظته على نفس الواجهة تمامًا ======
255
- def make_mcqs(text:str, n:int=6)->List[MCQ]:
256
- sents=split_sents(text)
257
- if not sents:
 
 
 
 
 
258
  raise ValueError("النص قصير أو غير صالح.")
259
 
260
- # عبارات مفتاحية 1–3 كلمات
261
- keyphrases = yake_keywords(text, k=160)
262
- keyphrases = [kp for kp in keyphrases if good_kw(kp) and 2 <= len(kp) <= 40]
263
 
264
- # ربط العبارة بجملة مناسبة (طول معقول ≥ 60) لضمان سياق واضح
265
- sent_for={}
266
  for s in sents:
267
- if len(s) < 60:
268
- continue
269
  for kp in keyphrases:
270
- if kp in sent_for:
271
  continue
272
  if re2.search(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", s):
273
- sent_for[kp]=s
274
-
275
- if not sent_for:
276
- # fallback: لو ما لقينا مطابقات جيدة، نرجع للمفردات العامة من النص
277
- tokens = [t for t in re2.findall(r"[\p{L}\p{N}_]+", text) if good_kw(t)]
278
- freq = [w for w,_ in sorted(((t, text.count(t)) for t in tokens), key=lambda x:-x[1])]
279
- keyphrases = freq[:120]
280
- for s in sents:
281
- if len(s) < 60:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  continue
283
- for kp in keyphrases:
284
- if kp in sent_for:
285
- continue
286
- if re2.search(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", s):
287
- sent_for[kp]=s
288
- if len(sent_for)>=n*2:
289
- break
290
-
291
- if not sent_for:
292
- raise RuntimeError("تعذّر توليد أسئلة من هذا النص.")
293
-
294
- # نعطي أولوية للعبارات الأطول (أكثر إعلامية)
295
- items=[]; used_sents=set(); used_keys=set()
 
 
 
 
 
 
 
 
296
  for kp in sorted(sent_for.keys(), key=lambda x: (-len(x), x)):
297
- if len(items)>=n: break
298
- s=sent_for[kp]
299
- if s in used_sents or kp in used_keys:
300
  continue
 
301
 
302
- # ابنِ سؤال الفراغ
303
- q=re2.sub(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", "_____", s, count=1)
304
-
305
- # مشتتات أذكى (مع رجوع تلقائي لو النماذج مش متاحة)
306
- pool = [x for x in keyphrases if x != kp]
307
- ch = smart_distractors(kp, pool, s, k=3) + [kp]
308
- random.shuffle(ch); ans=ch.index(kp)
 
 
 
 
 
 
 
309
 
310
- items.append(MCQ(id=str(uuid.uuid4())[:8], question=q, choices=ch, answer_index=ans))
311
- used_sents.add(s); used_keys.add(kp)
 
 
 
 
312
 
313
- if not items:
314
  raise RuntimeError("تعذّر توليد أسئلة.")
315
- return items
 
 
 
 
 
 
 
 
 
 
316
 
317
  def to_records(items:List[MCQ])->List[dict]:
318
  recs=[]
319
  for it in items:
320
  opts=[]
 
321
  for i,lbl in enumerate(["A","B","C","D"]):
322
- txt=(it.choices[i] if i<len(it.choices) else "").strip()
323
- txt=txt.replace(",", "،").replace("?", "؟").replace(";", "؛")
324
- opts.append({"id":lbl,"text":txt or "—","is_correct":(i==it.answer_index)})
 
 
 
325
  recs.append({"id":it.id,"question":it.question.strip(),"options":opts})
326
  return recs
327
 
@@ -358,19 +782,32 @@ def render_quiz_html(records: List[dict]) -> str:
358
  """)
359
  return f"""<div id="quiz" class="quiz-wrap">{''.join(parts)}</div>"""
360
 
361
- # ------------------ توليد الامتحان وتبديل الصفحات ------------------
362
- def build_quiz(text_area, file_path, n, model_id, zoom):
363
  text_area = (text_area or "").strip()
364
  if not text_area and not file_path:
365
  return "", gr.update(visible=True), gr.update(visible=False), "🛈 الصق نصًا أو ارفع ملفًا أولًا."
366
- if text_area:
367
- raw = text_area
368
- else:
369
- raw, _ = file_to_text(file_path, model_id=model_id, zoom=float(zoom))
370
  cleaned = postprocess(raw)
371
- items = make_mcqs(cleaned, n=int(n))
372
- recs = to_records(items)
373
- return render_quiz_html(recs), gr.update(visible=False), gr.update(visible=True), ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
 
375
  # ------------------ CSS ------------------
376
  CSS = """
@@ -382,12 +819,12 @@ body{direction:rtl; font-family:system-ui,'Cairo','IBM Plex Arabic',sans-serif;
382
  .gradio-container{max-width:980px;margin:0 auto;padding:12px 12px 40px;}
383
  h2.top{color:#eaeaf2;margin:6px 0 16px}
384
 
385
- /* صفحة الإدخال ثابتة الارتفاع ولا تتغير بعد الرفع */
386
  .input-panel{background:var(--panel);border:1px solid var(--border);border-radius:14px;padding:16px;
387
  box-shadow:0 16px 38px rgba(0,0,0,.35); min-height:360px; display:flex; flex-direction:column; gap:12px;}
388
  .small{opacity:.9;color:#d9dee8}
389
 
390
- /* منع لوحة المعاينة الخاصة بالملفات التي تغيّر التخطيط */
391
  [data-testid="file"] .file-preview, [data-testid="file"] .file-preview * { display:none !important; }
392
  [data-testid="file"] .grid-wrap { display:block !important; }
393
  .upload-like{border:2px dashed #3b3f52;background:#121318;border-radius:12px;padding:12px;color:#cfd5e3;min-height:90px}
@@ -404,7 +841,7 @@ textarea{min-height:120px}
404
  .q-badge.ok{background:#083a2a;color:#b6f4db;border:1px solid #145b44}
405
  .q-badge.err{background:#3a0d14;color:#ffd1d6;border:1px solid #6a1e2b}
406
 
407
- .q-text{color:var(--text);font-size:1.06rem;line-height:1.8;margin:8px 0 12px}
408
  .opts{display:flex;flex-direction:column;gap:8px}
409
  .opt{display:flex;gap:10px;align-items:center;background:#14161c;border:1px solid #2a2d3a;border-radius:12px;padding:10px;transition:background .15s,border-color .15s}
410
  .opt input{accent-color:var(--accent2)}
@@ -415,17 +852,16 @@ textarea{min-height:120px}
415
 
416
  .q-actions{display:flex;gap:10px;align-items:center;margin-top:10px}
417
  .q-actions .q-submit{
418
- background:#2dd4bf;border:none;color:#0b0d10;font-weight:800;border-radius:10px;padding:8px 14px;cursor:pointer;
419
  }
420
  .q-actions .q-submit:disabled{opacity:.5;cursor:not-allowed}
421
  .q-note{color:#ffd1d6}
422
  .q-note.warn{color:#ffd1d6}
423
  """
424
 
425
- # ------------------ JS: ربط Submit بعد الرندر (مع Output مخفي لضمان التنفيذ) ------------------
426
  ATTACH_LISTENERS_JS = """
427
  () => {
428
- // اربط مرة واحدة فقط
429
  if (window.__q_submit_bound_multi2) { return 'already'; }
430
  window.__q_submit_bound_multi2 = true;
431
 
@@ -448,22 +884,30 @@ ATTACH_LISTENERS_JS = """
448
 
449
  const chosenLabel = chosen.closest('.opt');
450
 
451
- // حالة صحيحة: لوّن أخضر وأقفل السؤال كاملاً
452
  if (chosen.value === correct) {
453
  chosenLabel.classList.add('ok');
454
  if (badge){ badge.hidden=false; badge.className='q-badge ok'; badge.textContent='Correct!'; }
455
- // أقفل هذا السؤال فقط بعد الصح
456
  card.querySelectorAll('input[type="radio"]').forEach(i => i.disabled = true);
457
  e.target.disabled = true;
458
  if (note) note.textContent = '';
 
 
 
 
 
 
 
 
 
 
 
 
459
  return;
460
  }
461
 
462
- // حالة خاطئة: لوّن أحمر فقط، ولا تعطل أي شيء — ليقدر يجرّب خيار آخر
463
- chosenLabel.classList.add('err'); // اتركه أحمر
464
  if (badge){ badge.hidden=false; badge.className='q-badge err'; badge.textContent='Incorrect.'; }
465
  if (note) note.textContent = '';
466
- // مهم: لا تعطّل الراديو ولا الزر
467
  });
468
 
469
  return 'wired-multi2';
@@ -474,7 +918,6 @@ ATTACH_LISTENERS_JS = """
474
  with gr.Blocks(title="Question Generator", css=CSS) as demo:
475
  gr.Markdown("<h2 class='top'>Question Generator</h2>")
476
 
477
- # الصفحة 1: إدخال ثابت لا تتغير أبعاده
478
  page1 = gr.Group(visible=True, elem_classes=["input-panel"])
479
  with page1:
480
  gr.Markdown("اختر **أحد** الخيارين ثم اضغط الزر.", elem_classes=["small"])
@@ -482,6 +925,10 @@ with gr.Blocks(title="Question Generator", css=CSS) as demo:
482
  file_comp = gr.File(label="أو ارفع ملف (PDF / TXT)", file_count="single",
483
  file_types=[".pdf",".txt"], type="filepath", elem_classes=["upload-like"])
484
  num_q = gr.Slider(4, 20, value=DEFAULT_NUM_QUESTIONS, step=1, label="عدد الأسئلة")
 
 
 
 
485
  with gr.Accordion("خيارات PDF المصوّر (اختياري)", open=False):
486
  trocr_model = gr.Dropdown(
487
  choices=[
@@ -493,19 +940,18 @@ with gr.Blocks(title="Question Generator", css=CSS) as demo:
493
  value=DEFAULT_TROCR_MODEL, label="نموذج TrOCR"
494
  )
495
  trocr_zoom = gr.Slider(2.0, 3.5, value=DEFAULT_TROCR_ZOOM, step=0.1, label="Zoom OCR")
 
496
  btn_build = gr.Button("generate quistion", elem_classes=["button-primary"])
497
  warn = gr.Markdown("", elem_classes=["small"])
498
 
499
- # الصفحة 2: الأسئلة
500
  page2 = gr.Group(visible=False)
501
  with page2:
502
  quiz_html = gr.HTML("")
503
- js_wired = gr.Textbox(visible=False) # Output مخفي لضمان تنفيذ JS
504
 
505
- # بناء الامتحان + تبديل الصفحات + ربط الـJS
506
  btn_build.click(
507
  build_quiz,
508
- inputs=[text_area, file_comp, num_q, trocr_model, trocr_zoom],
509
  outputs=[quiz_html, page1, page2, warn]
510
  ).then(
511
  None, inputs=None, outputs=[js_wired], js=ATTACH_LISTENERS_JS
 
1
  # -*- coding: utf-8 -*-
2
+ # Question Generator Final Publishable Build (Lite/Full)
3
+ # صفحات ثابتة + Submit لكل سؤال فعليًا + منع تغيّر أبعاد صفحة الإدخال
4
+ # طور "فراغ" + طور "فهم مباشر" (mT5) مع fallbacks، صعوبة، BM25، فلترة قوية للمشتّتات، وتنويع على مستوى الفقرات.
5
 
6
  import os, json, uuid, random, unicodedata
7
  from dataclasses import dataclass
8
  from pathlib import Path
9
+ from typing import List, Tuple, Optional
10
 
11
  from PIL import Image
12
  from pypdf import PdfReader
 
15
  import yake
16
  import gradio as gr
17
 
18
+ # ------------------ إعدادات عامّة ------------------
19
  random.seed(42)
20
  DEFAULT_NUM_QUESTIONS = 6
21
  DEFAULT_TROCR_MODEL = "microsoft/trocr-base-printed"
22
  DEFAULT_TROCR_ZOOM = 2.6
23
+ QUESTION_MODES = ["فراغ", "فهم مباشر"]
24
+ DIFFICULTY_MODES = ["سهل", "متوسط", "صعب"]
25
+
26
+ # BM25 (اختياري)
27
+ try:
28
+ from rank_bm25 import BM25Okapi
29
+ _HAS_BM25 = True
30
+ except Exception:
31
+ _HAS_BM25 = False
32
 
33
  # ------------------ OCR (تحميل كسول) ------------------
34
  _OCR = {}
35
  def get_ocr(model_id: str):
36
+ try:
37
+ from transformers import pipeline
38
+ import torch
39
+ dev = 0 if torch.cuda.is_available() else -1
40
+ if model_id not in _OCR:
41
+ _OCR[model_id] = pipeline("image-to-text", model=model_id, device=dev)
42
+ return _OCR[model_id]
43
+ except Exception:
44
+ # بديل آمن: دالة تُعيد نصًا فارغًا
45
+ return lambda im: [{"generated_text": ""}]
46
 
47
  # ------------------ PDF/TXT → نص ------------------
48
  def extract_text_with_pypdf(path: str) -> str:
 
120
  t = re2.sub(r"\[\d+\]", " ", t)
121
  return norm_ar(t)
122
 
123
+ # ------------------ بنية السؤال ------------------
124
  SENT_SPLIT = re2.compile(r"(?<=[\.!؟\?])\s+")
125
  AR_STOP = set("""في على من إلى عن مع لدى ذلك هذه هذا الذين التي الذي أو أم إن أن كان تكون كانوا كانت كنت ثم قد لقد ربما بل لكن إلا سوى حتى حيث كما لما ما لماذا متى أين كيف أي هناك هنا هؤلاء أولئك نحن هو هي هم هن أنت أنتم أنتن""".split())
126
 
 
135
  s=[x.strip() for x in SENT_SPLIT.split(t) if x.strip()]
136
  return [x for x in s if len(x)>=25]
137
 
138
+ # ====== (1) عبارات مفتاحية (YAKE) ======
139
+ def yake_keywords(t: str, k: int = 260) -> List[str]:
140
  phrases = []
141
  seen = set()
142
  for n in [3, 2, 1]:
 
159
  def good_kw(kw:str)->bool:
160
  return kw and len(kw)>=2 and kw not in AR_STOP and not re2.match(r"^[\p{P}\p{S}\d_]+$", kw)
161
 
162
+ # ====== POS/NER اختياري ======
163
+ _HAS_CAMEL = False
164
+ try:
165
+ from camel_tools.morphology.analyzer import Analyzer
166
+ from camel_tools.ner import NERecognizer
167
+ _HAS_CAMEL = True
168
+ _AN = Analyzer.builtin_analyzer()
169
+ _NER = NERecognizer.pretrained()
170
+ except Exception:
171
+ _HAS_CAMEL = False
172
+
173
+ NER_TAGS = {"PER","LOC","ORG","MISC"}
174
+
175
+ def ar_pos(word: str) -> str:
176
+ if not _HAS_CAMEL:
177
+ if re2.match(r"^(في|على|الى|إلى|من|عن|حتى|ثم|بل|لكن|أو|و)$", word): return "PART"
178
+ if re2.match(r"^[\p{N}]+$", word): return "NUM"
179
+ if re2.search(r"(ة|ات|ون|ين|ان)$", word): return "NOUN"
180
+ return "X"
181
+ try:
182
+ ana = _AN.analyze(word)
183
+ if not ana: return "X"
184
+ from collections import Counter
185
+ pos_candidates = [a.get('pos','X') for a in ana]
186
+ return Counter(pos_candidates).most_common(1)[0][0] if pos_candidates else "X"
187
+ except Exception:
188
+ return "X"
189
+
190
+ def is_named_entity(token: str) -> bool:
191
+ if not _HAS_CAMEL:
192
+ return False
193
+ try:
194
+ tag = _NER.predict_sentence([token])[0]
195
+ return tag in NER_TAGS
196
+ except Exception:
197
+ return False
198
+
199
+ def is_clean_sentence(s: str) -> bool:
200
+ if not (60 <= len(s) <= 240): return False
201
+ if re2.search(r"https?://|www\.", s): return False
202
+ if re2.search(r"\d{2,}", s): return False
203
+ return True
204
+
205
+ def safe_keyword(k: str) -> bool:
206
+ if not good_kw(k): return False
207
+ if is_named_entity(k): return False
208
+ if ar_pos(k) in {"PRON","PART"}: return False
209
+ return True
210
+
211
+ # ====== Embeddings/Masking/Cross-Encoder (اختياري) ======
212
  _EMB = None
213
  def get_embedder():
214
  global _EMB
 
217
  from sentence_transformers import SentenceTransformer
218
  _EMB = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
219
  except Exception:
220
+ _EMB = False
221
  return _EMB
222
 
223
+ def nearest_terms(target: str, pool: List[str], k: int = 24) -> List[Tuple[str, float]]:
224
  emb = get_embedder()
225
  if not emb:
226
  return []
 
230
  vecs = emb.encode([target] + cand, normalize_embeddings=True)
231
  t, C = vecs[0], vecs[1:]
232
  import numpy as np
233
+ sims = (C @ t)
234
  idx = np.argsort(-sims)[:k]
235
  return [(cand[i], float(sims[i])) for i in idx]
236
 
 
245
  _MLM = False
246
  return _MLM
247
 
248
+ def mlm_distractors(sentence_with_blank: str, correct: str, k: int = 18) -> List[str]:
249
  masker = get_masker()
250
  if not masker:
251
  return []
252
  masked = sentence_with_blank.replace("_____", masker.tokenizer.mask_token)
253
  try:
254
+ outs = masker(masked, top_k=max(25, k+7))
255
  cands = []
256
  for o in outs:
257
  tok = o["token_str"].strip()
258
  if tok and tok != correct and len(tok) >= 2 and not re2.match(r"^[\p{P}\p{S}\d_]+$", tok):
259
  cands.append(tok)
260
+ uniq, seen = [], set()
261
  for w in cands:
262
  if w not in seen:
263
  uniq.append(w); seen.add(w)
 
265
  except Exception:
266
  return []
267
 
268
+ _CE = None
269
+ def get_cross_encoder():
270
+ global _CE
271
+ if _CE is None:
272
+ try:
273
+ from sentence_transformers import CrossEncoder
274
+ _CE = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
275
+ except Exception:
276
+ _CE = False
277
+ return _CE
278
+
279
+ def rank_by_ce(sentence_with_blank: str, candidates: List[str]) -> List[str]:
280
+ ce = get_cross_encoder()
281
+ if not ce or not candidates:
282
+ return candidates
283
+ pairs = [(sentence_with_blank.replace("_____", c), c) for c in candidates]
284
+ try:
285
+ scores = ce.predict([p[0] for p in pairs])
286
+ ranked = [c for _, c in sorted(zip(scores, [p[1] for p in pairs]), key=lambda x:-x[0])]
287
+ return ranked
288
+ except Exception:
289
+ return candidates
290
+
291
+ # --------- أدوات مساعدة للمشتّتات ---------
292
+ def word_tokens(s: str) -> List[str]:
293
+ s = norm_ar(s)
294
+ return re2.findall(r"\p{L}+", s)
295
+
296
+ def token_set(s: str) -> set:
297
+ return set([t for t in word_tokens(s) if t not in AR_STOP])
298
+
299
+ def jaccard(a: str, b: str) -> float:
300
+ A, B = token_set(a), token_set(b)
301
+ if not A or not B: return 0.0
302
+ return len(A & B) / max(1, len(A | B))
303
+
304
+ def is_sub_or_super(a: str, b: str) -> bool:
305
+ A, B = norm_ar(a), norm_ar(b)
306
+ return (A in B) or (B in A)
307
+
308
+ def appears_as_long_fragment_in_sentence(w: str, sentence: str) -> bool:
309
+ toks = word_tokens(w)
310
+ if len(toks) < 3:
311
+ return False
312
+ return re2.search(rf"(?<!\p{{L}}){re2.escape(norm_ar(w))}(?!\p{{L}})", norm_ar(sentence)) is not None
313
+
314
+ def choice_length_ok(w: str) -> bool:
315
+ n = len(word_tokens(w))
316
+ return 1 <= n <= 6
317
+
318
+ def paragraph_index_map(text: str, sentences: List[str]) -> dict:
319
+ paras = [norm_ar(p) for p in re2.split(r"\n{2,}", text) if p.strip()]
320
+ mapping = {}
321
+ for i, s in enumerate(sentences):
322
+ ns = norm_ar(s)
323
+ pid = None
324
+ for j, p in enumerate(paras):
325
+ if ns and ns in p:
326
+ pid = j; break
327
+ mapping[s] = pid if pid is not None else -1
328
+ return mapping
329
+
330
+ def looks_like_title_fragment(s: str) -> bool:
331
+ return ":" in s and s.index(":") < max(10, len(s)//6)
332
+
333
+ def is_nouny_phrase(w: str) -> bool:
334
+ toks = word_tokens(w)
335
+ if not (1 <= len(toks) <= 4): return False
336
+ if re2.search(r"(يفعل|تفعل|يشهد|تقوم|يمكن|قد|سوف)$", w): return False
337
+ return True
338
+
339
+ def best_keyword_in_sentence(sentence: str, global_text: str) -> Optional[str]:
340
+ if looks_like_title_fragment(sentence):
341
+ parts = sentence.split(":", 1)
342
+ sentence = parts[1] if len(parts) > 1 else sentence
343
+ try:
344
+ ex = yake.KeywordExtractor(lan='ar', n=3, top=24)
345
+ pairs = ex.extract_keywords(sentence)
346
+ except Exception:
347
+ pairs = []
348
+ cands = []
349
+ for w, _ in pairs:
350
+ w = re2.sub(r"\s+", " ", w.strip())
351
+ if not w or not good_kw(w) or not safe_keyword(w):
352
  continue
353
+ if not is_nouny_phrase(w):
354
  continue
355
+ if not re2.search(rf"(?<!\p{{L}}){re2.escape(w)}(?!\p{{L}})", sentence):
356
  continue
357
+ freq_weight = global_text.count(w)
358
+ cands.append((w, len(w) + 0.7*freq_weight))
359
+ if not cands:
360
+ toks = [t for t in re2.findall(r"\p{L}+", sentence) if good_kw(t) and safe_keyword(t)]
361
+ toks = [t for t in toks if is_nouny_phrase(t)]
362
+ toks.sort(key=len, reverse=True)
363
+ return toks[0] if toks else None
364
+ cands.sort(key=lambda x: -x[1])
365
+ return cands[0][0]
366
+
367
+ def similarity_caps(difficulty: str):
368
+ if difficulty == "سهل":
369
+ return 0.88
370
+ if difficulty == "صعب":
371
+ return 0.95
372
+ return 0.92
373
+
374
+ def tokenize_ar(s: str) -> List[str]:
375
+ s = norm_ar(s)
376
+ toks = re2.findall(r"\p{L}+", s)
377
+ return [t for t in toks if len(t) >= 2 and t not in AR_STOP]
378
+
379
+ def bm25_build(sentences: List[str]):
380
+ if not _HAS_BM25 or not sentences:
381
+ return None, []
382
+ corpus_tokens = [tokenize_ar(s) for s in sentences]
383
+ bm = BM25Okapi(corpus_tokens)
384
+ return bm, corpus_tokens
385
+
386
+ def bm25_candidates(correct: str, sentences: List[str], bm, corpus_tokens, top: int = 20) -> List[str]:
387
+ if not bm: return []
388
+ q = tokenize_ar(correct)
389
+ scores = bm.get_scores(q)
390
+ idxs = sorted(range(len(scores)), key=lambda i: -scores[i])[:min(top, len(scores))]
391
+ pool = set()
392
+ for i in idxs:
393
+ for tok in corpus_tokens[i]:
394
+ if tok != correct and good_kw(tok):
395
+ pool.add(tok)
396
+ return list(pool)
397
+
398
+ def typo_like_variants(answer: str, k: int = 4) -> List[str]:
399
+ """مشتّتات شكلية: تعريف/تنكير، ي/ى، ة/ه، حذف حرف."""
400
+ a = norm_ar(answer)
401
+ vars = set()
402
+ if a.startswith("ال"):
403
+ vars.add(a[2:])
404
+ else:
405
+ vars.add("ال" + a)
406
+ vars.add(a.replace("ي", "ى"))
407
+ vars.add(a.replace("ى", "ي"))
408
+ vars.add(a.replace("ة", "ه"))
409
+ vars.add(a.replace("ه", "ة"))
410
+ if len(a) > 5:
411
+ mid = len(a)//2
412
+ vars.add(a[:mid] + a[mid+1:])
413
+ out = [v for v in vars if v and norm_ar(v) != norm_ar(a)]
414
+ return out[:k]
415
+
416
+ # ====== مشتّتات ذكية ======
417
+ def pos_compatible(a: str, b: str) -> bool:
418
+ pa, pb = ar_pos(a), ar_pos(b)
419
+ if "X" in (pa, pb):
420
+ return True
421
+ return pa == pb
422
+
423
+ def length_close(a: str, b: str) -> bool:
424
+ return abs(len(a) - len(b)) <= max(6, len(b)//2)
425
+
426
+ def smart_distractors(correct: str, phrase_pool: List[str], sentence: str, k: int = 3,
427
+ all_sentences: Optional[List[str]] = None, difficulty: str = "متوسط") -> List[str]:
428
+ base: List[str] = []
429
+
430
+ # (0) مشتّتات شكلية أولاً
431
+ base.extend(typo_like_variants(correct, k=4))
432
+
433
+ # (أ) جيران دلاليين
434
+ base.extend([w for w,_ in nearest_terms(correct, phrase_pool, k=24)])
435
+
436
+ # (ب) FILL-MASK
437
+ for w in mlm_distractors(sentence.replace(correct, "_____"), correct, k=18):
438
+ if w not in base:
439
+ base.append(w)
440
+
441
+ # (ج) BM25
442
+ if all_sentences:
443
+ bm, corp = bm25_build(all_sentences)
444
+ for w in bm25_candidates(correct, all_sentences, bm, corp, top=18):
445
+ if w not in base:
446
+ base.append(w)
447
+
448
+ # فلترة صارمة
449
+ clean: List[str] = []
450
+ for w in base:
451
+ w = (w or "").strip()
452
+ if not w or w == correct:
453
+ continue
454
+ if not choice_length_ok(w):
455
+ continue
456
+ if appears_as_long_fragment_in_sentence(w, sentence):
457
+ continue
458
+ if is_named_entity(w):
459
+ continue
460
+ if not pos_compatible(w, correct):
461
+ continue
462
+ if not length_close(w, correct):
463
+ continue
464
+ if is_sub_or_super(w, correct):
465
+ continue
466
+ if jaccard(w, correct) >= 0.5:
467
+ continue
468
+ clean.append(w)
469
+
470
+ # ترتيب (اختياري) + فلتر قرب دلالي
471
+ clean = rank_by_ce(sentence.replace(correct, "_____"), clean)[:max(k*4, k)]
472
+ cap = similarity_caps(difficulty)
473
+ try:
474
+ emb = get_embedder()
475
+ if emb and clean:
476
+ vecs = emb.encode([correct] + clean, normalize_embeddings=True)
477
+ c, others = vecs[0], vecs[1:]
478
+ import numpy as np
479
+ sims = others @ c
480
+ filtered = [w for w, s in zip(clean, sims) if s < cap]
481
+ if len(filtered) >= k:
482
+ clean = filtered
483
+ except Exception:
484
+ pass
485
+
486
+ # تجميع أخير
487
+ out, seen = [], set()
488
+ for w in clean:
489
+ if w in seen:
490
+ continue
491
+ seen.add(w); out.append(w)
492
  if len(out) >= k:
493
  break
494
 
495
+ # تعويض إذا لزم
496
  if len(out) < k:
497
+ extras = [w for w in phrase_pool
498
+ if w not in out and w != correct and choice_length_ok(w)
499
+ and not appears_as_long_fragment_in_sentence(w, sentence)
500
+ and not is_sub_or_super(w, correct)
501
+ and jaccard(w, correct) < 0.5]
502
+ out.extend(extras[:(k-len(out))])
503
+ if len(out) < k:
504
+ out.extend([w for w in ["…"]*(k-len(out))]) # لن تُقبل لاحقًا إن لم نكمل 4 خيارات
505
+ return out[:k]
506
+
507
+ # ====== mT5 (اختياري) ======
508
+ _MT5 = {"tok": None, "model": None, "ok": False}
509
+ def get_mt5():
510
+ if _MT5["tok"] is not None or _MT5["model"] is not None or _MT5["ok"]:
511
+ return _MT5["tok"], _MT5["model"], _MT5["ok"]
512
+ try:
513
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
514
+ _MT5["tok"] = AutoTokenizer.from_pretrained("google/mt5-small")
515
+ _MT5["model"] = AutoModelForSeq2SeqLM.from_pretrained("google/mt5-small")
516
+ _MT5["ok"] = True
517
+ except Exception:
518
+ _MT5["tok"] = None; _MT5["model"] = None; _MT5["ok"] = False
519
+ return _MT5["tok"], _MT5["model"], _MT5["ok"]
520
 
521
+ def parse_json_block(s: str) -> Optional[dict]:
522
+ try:
523
+ return json.loads(s)
524
+ except Exception:
525
+ pass
526
+ m = re2.search(r"\{.*\}", s, flags=re2.DOTALL)
527
+ if m:
528
+ try:
529
+ return json.loads(m.group(0))
530
+ except Exception:
531
+ return None
532
+ return None
533
+
534
+ def comp_prompt(sentence: str) -> str:
535
+ return (
536
+ "أنت منشئ أسئلة متعددة الخيارات باللغة العربية.\n"
537
+ "من الجملة التالية، أنشئ سؤال فهم مباشر واحدًا مع أربع خيارات وإشارة للجواب الصحيح.\n"
538
+ "أعد فقط JSON بهذا الشكل:\n"
539
+ "{\n"
540
+ "\"question\": \"...\",\n"
541
+ "\"choices\": [\"...\",\"...\",\"...\",\"...\"],\n"
542
+ "\"answer_index\": 0\n"
543
+ "}\n\n"
544
+ f"الجملة: {sentence}"
545
+ )
546
+
547
+ def gen_one_comp_q(sentence: str, tok, model, max_new_tokens=128) -> Optional[MCQ]:
548
+ try:
549
+ import torch
550
+ device = "cuda" if torch.cuda.is_available() else "cpu"
551
+ model = model.to(device)
552
+ inp = tok(comp_prompt(sentence), return_tensors="pt").to(device)
553
+ out = model.generate(
554
+ **inp,
555
+ max_new_tokens=max_new_tokens,
556
+ do_sample=True,
557
+ temperature=0.8,
558
+ top_p=0.9,
559
+ num_return_sequences=1,
560
+ eos_token_id=tok.eos_token_id
561
+ )
562
+ text = tok.decode(out[0], skip_special_tokens=True)
563
+ data = parse_json_block(text) or {}
564
+ q = str(data.get("question","")).strip()
565
+ choices = data.get("choices", [])
566
+ ai = data.get("answer_index", 0)
567
+ if not q or not isinstance(choices, list) or len(choices) < 4:
568
+ return None
569
+ choices = [str(c).strip() for c in choices][:4]
570
+ ai = ai if isinstance(ai, int) and 0 <= ai < 4 else 0
571
+ return MCQ(id=str(uuid.uuid4())[:8], question=q, choices=choices, answer_index=ai)
572
+ except Exception:
573
+ return None
574
+
575
+ def make_comp_mcqs(text: str, n: int = 6, difficulty: str = "متوسط") -> List[MCQ]:
576
+ tok, model, ok = get_mt5()
577
+ if not ok:
578
+ return make_mcqs(text, n, difficulty=difficulty)
579
+
580
+ sents_all = split_sents(text)
581
+ sents = [s for s in sents_all if is_clean_sentence(s)] or sents_all[:]
582
+ if not sents:
583
+ return make_mcqs(text, n, difficulty=difficulty)
584
+
585
+ # دمج جمل قصيرة لمقاطع مفيدة
586
+ def make_chunks(sents, max_len=220):
587
+ chunks = []
588
+ i = 0
589
+ while i < len(sents):
590
+ cur = sents[i]
591
+ j = i + 1
592
+ while j < len(sents) and len(cur) + 1 + len(sents[j]) <= max_len:
593
+ cur = cur + " " + sents[j]
594
+ j += 1
595
+ chunks.append(cur)
596
+ i = j
597
+ return chunks
598
+
599
+ candidates = sents[:] + make_chunks(sents, max_len=220)
600
+ random.shuffle(candidates)
601
+
602
+ items: List[MCQ] = []
603
+ tried = 0
604
+ for s in candidates:
605
+ if len(items) >= n: break
606
+ mcq = gen_one_comp_q(s, tok, model)
607
+ tried += 1
608
+ if mcq:
609
+ q = re2.sub(r"\s+", " ", mcq.question).strip()
610
+ if not (12 <= len(q) <= 220):
611
+ continue
612
+ choices = [re2.sub(r"\s+", " ", c).strip() for c in mcq.choices]
613
+ seen=set(); clean=[]
614
+ for c in choices:
615
+ if c and c not in seen:
616
+ seen.add(c); clean.append(c)
617
+ clean = (clean + ["…","…","…","…"])[:4]
618
+ ai = mcq.answer_index if isinstance(mcq.answer_index,int) and 0<=mcq.answer_index<4 else 0
619
+ items.append(MCQ(id=str(uuid.uuid4())[:8], question=q, choices=clean, answer_index=ai))
620
+ if tried >= n * 12:
621
+ break
622
 
623
+ if not items:
624
+ return make_mcqs(text, n, difficulty=difficulty)
625
+ return items[:n]
626
+
627
+ # ------------------ مُولّد أسئلة "فراغ" (نهائي) ------------------
628
+ def make_mcqs(text: str, n: int = 6, difficulty: str = "متوسط") -> List[MCQ]:
629
+ all_sents = split_sents(text)
630
+ sents = [s for s in all_sents if is_clean_sentence(s)] or all_sents[:]
631
+ if not sents:
632
  raise ValueError("النص قصير أو غير صالح.")
633
 
634
+ keyphrases = yake_keywords(text, k=260)
635
+ keyphrases = [kp for kp in keyphrases if safe_keyword(kp) and 2 <= len(kp) <= 40]
 
636
 
637
+ sent_for: dict = {}
 
638
  for s in sents:
 
 
639
  for kp in keyphrases:
640
+ if kp in sent_for:
641
  continue
642
  if re2.search(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", s):
643
+ sent_for[kp] = s
644
+ if len(sent_for) >= n * 5:
645
+ break
646
+
647
+ para_map = paragraph_index_map(text, sents)
648
+ used_sentences: set = set()
649
+ items: List[MCQ] = []
650
+
651
+ MAX_PER_PARA = 2
652
+ para_count: dict = {}
653
+
654
+ def add_item_from_pair(sentence: str, kp: str) -> bool:
655
+ nonlocal items, used_sentences, para_count
656
+ pid = para_map.get(sentence, -1)
657
+ if para_count.get(pid, 0) >= MAX_PER_PARA:
658
+ return False
659
+ if not re2.search(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", sentence):
660
+ return False
661
+ q = re2.sub(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", "_____", sentence, count=1)
662
+ pool = [x for x in keyphrases if x != kp] or keyphrases[:]
663
+ ch = smart_distractors(kp, pool, sentence, k=3,
664
+ all_sentences=all_sents, difficulty=difficulty) + [kp]
665
+
666
+ choices, seen = [], set()
667
+ for c in ch:
668
+ c = (c or "").strip()
669
+ if not c or c in seen:
670
+ continue
671
+ if not choice_length_ok(c):
672
  continue
673
+ if appears_as_long_fragment_in_sentence(c, sentence):
674
+ continue
675
+ if is_sub_or_super(c, kp) or jaccard(c, kp) >= 0.5:
676
+ continue
677
+ seen.add(c); choices.append(c)
678
+
679
+ if kp not in choices:
680
+ choices.append(kp); seen.add(kp)
681
+ if len(choices) < 4:
682
+ return False
683
+
684
+ choices = choices[:4]
685
+ random.shuffle(choices)
686
+ ans = choices.index(kp)
687
+
688
+ items.append(MCQ(id=str(uuid.uuid4())[:8], question=q, choices=choices, answer_index=ans))
689
+ used_sentences.add(sentence)
690
+ para_count[pid] = para_count.get(pid, 0) + 1
691
+ return True
692
+
693
+ # تمريرة أولى: تنويع على الفقرات
694
  for kp in sorted(sent_for.keys(), key=lambda x: (-len(x), x)):
695
+ if len(items) >= n: break
696
+ s = sent_for[kp]
697
+ if s in used_sentences:
698
  continue
699
+ _ = add_item_from_pair(s, kp)
700
 
701
+ def fill_from_sentences(candidates: List[str]):
702
+ for s in candidates:
703
+ if len(items) >= n: break
704
+ if s in used_sentences:
705
+ continue
706
+ kp = None
707
+ for kpp, ss in sent_for.items():
708
+ if ss == s:
709
+ kp = kpp; break
710
+ if kp is None:
711
+ kp = best_keyword_in_sentence(s, text)
712
+ if not kp:
713
+ continue
714
+ _ = add_item_from_pair(s, kp)
715
 
716
+ if len(items) < n:
717
+ remaining_new_para = [s for s in sents if para_count.get(para_map.get(s, -1), 0) < MAX_PER_PARA]
718
+ fill_from_sentences(remaining_new_para)
719
+ if len(items) < n:
720
+ leftovers = [s for s in sents if s not in used_sentences]
721
+ fill_from_sentences(leftovers)
722
 
723
+ if not items:
724
  raise RuntimeError("تعذّر توليد أسئلة.")
725
+ return items[:n]
726
+
727
+ # ------------------ تحويل إلى سجلات العرض ------------------
728
+ def clean_option_text(t: str) -> str:
729
+ t = (t or "").strip()
730
+ t = re2.sub(AR_DIAC, "", t)
731
+ t = re2.sub(r"\s+", " ", t)
732
+ t = re2.sub(r"^[\p{P}\p{S}_-]+|[\p{P}\p{S}_-]+$", "", t)
733
+ # قصّ لطول معقول
734
+ t = re2.sub(r"^(.{,60})(?:\s.*)?$", r"\1", t)
735
+ return t or "…"
736
 
737
  def to_records(items:List[MCQ])->List[dict]:
738
  recs=[]
739
  for it in items:
740
  opts=[]
741
+ used=set()
742
  for i,lbl in enumerate(["A","B","C","D"]):
743
+ txt=(it.choices[i] if i<len(it.choices) else "")
744
+ txt=clean_option_text(txt.replace(",", "،").replace("?", "؟").replace(";", "؛"))
745
+ if txt in used:
746
+ txt = f"{txt}‌{i+1}"
747
+ used.add(txt)
748
+ opts.append({"id":lbl,"text":txt,"is_correct":(i==it.answer_index)})
749
  recs.append({"id":it.id,"question":it.question.strip(),"options":opts})
750
  return recs
751
 
 
782
  """)
783
  return f"""<div id="quiz" class="quiz-wrap">{''.join(parts)}</div>"""
784
 
785
+ # ------------------ بناء الامتحان وتبديل الصفحات ------------------
786
+ def build_quiz(text_area, file_path, n, model_id, zoom, mode, difficulty):
787
  text_area = (text_area or "").strip()
788
  if not text_area and not file_path:
789
  return "", gr.update(visible=True), gr.update(visible=False), "🛈 الصق نصًا أو ارفع ملفًا أولًا."
790
+ raw = text_area if text_area else file_to_text(file_path, model_id=model_id, zoom=float(zoom))[0]
 
 
 
791
  cleaned = postprocess(raw)
792
+
793
+ used_mode = mode
794
+ try:
795
+ if mode == "فهم مباشر":
796
+ tok, model, ok = get_mt5()
797
+ if ok:
798
+ items = make_comp_mcqs(cleaned, n=int(n), difficulty=difficulty)
799
+ else:
800
+ items = make_mcqs(cleaned, n=int(n), difficulty=difficulty)
801
+ used_mode = "فراغ (fallback)"
802
+ else:
803
+ items = make_mcqs(cleaned, n=int(n), difficulty=difficulty)
804
+ except Exception:
805
+ items = make_mcqs(cleaned, n=int(n), difficulty=difficulty)
806
+ used_mode = "فراغ (fallback)"
807
+
808
+ recs = to_records(items)
809
+ warn = f"نمط مُستخدَم: **{used_mode}** — عدد الأسئلة: {len(items)}"
810
+ return render_quiz_html(recs), gr.update(visible=False), gr.update(visible=True), warn
811
 
812
  # ------------------ CSS ------------------
813
  CSS = """
 
819
  .gradio-container{max-width:980px;margin:0 auto;padding:12px 12px 40px;}
820
  h2.top{color:#eaeaf2;margin:6px 0 16px}
821
 
822
+ /* صفحة الإدخال ثابتة الارتفاع ولا تتغير أبعادها */
823
  .input-panel{background:var(--panel);border:1px solid var(--border);border-radius:14px;padding:16px;
824
  box-shadow:0 16px 38px rgba(0,0,0,.35); min-height:360px; display:flex; flex-direction:column; gap:12px;}
825
  .small{opacity:.9;color:#d9dee8}
826
 
827
+ /* إخفاء معاينة الملف */
828
  [data-testid="file"] .file-preview, [data-testid="file"] .file-preview * { display:none !important; }
829
  [data-testid="file"] .grid-wrap { display:block !important; }
830
  .upload-like{border:2px dashed #3b3f52;background:#121318;border-radius:12px;padding:12px;color:#cfd5e3;min-height:90px}
 
841
  .q-badge.ok{background:#083a2a;color:#b6f4db;border:1px solid #145b44}
842
  .q-badge.err{background:#3a0d14;color:#ffd1d6;border:1px solid #6a1e2b}
843
 
844
+ .q-text{color:#eaeaf2;font-size:1.06rem;line-height:1.8;margin:8px 0 12px}
845
  .opts{display:flex;flex-direction:column;gap:8px}
846
  .opt{display:flex;gap:10px;align-items:center;background:#14161c;border:1px solid #2a2d3a;border-radius:12px;padding:10px;transition:background .15s,border-color .15s}
847
  .opt input{accent-color:var(--accent2)}
 
852
 
853
  .q-actions{display:flex;gap:10px;align-items:center;margin-top:10px}
854
  .q-actions .q-submit{
855
+ background:#2dd4bf;border:none;color:#0b0د10;font-weight:800;border-radius:10px;padding:8px 14px;cursor:pointer;
856
  }
857
  .q-actions .q-submit:disabled{opacity:.5;cursor:not-allowed}
858
  .q-note{color:#ffd1d6}
859
  .q-note.warn{color:#ffd1d6}
860
  """
861
 
862
+ # ------------------ JS: ربط Submit + إبراز الصح ------------------
863
  ATTACH_LISTENERS_JS = """
864
  () => {
 
865
  if (window.__q_submit_bound_multi2) { return 'already'; }
866
  window.__q_submit_bound_multi2 = true;
867
 
 
884
 
885
  const chosenLabel = chosen.closest('.opt');
886
 
 
887
  if (chosen.value === correct) {
888
  chosenLabel.classList.add('ok');
889
  if (badge){ badge.hidden=false; badge.className='q-badge ok'; badge.textContent='Correct!'; }
 
890
  card.querySelectorAll('input[type="radio"]').forEach(i => i.disabled = true);
891
  e.target.disabled = true;
892
  if (note) note.textContent = '';
893
+
894
+ const qNode = card.querySelector('.q-text');
895
+ if (qNode){
896
+ const full = qNode.textContent || qNode.innerText || '';
897
+ const correctText = [...card.querySelectorAll('.opt')].find(o =>
898
+ o.querySelector('input').value === correct
899
+ )?.querySelector('.opt-text')?.textContent || '';
900
+ if (full && correctText && full.includes('_____')){
901
+ const highlighted = full.replace('_____', `<mark style="background:#2dd4bf22;border:1px solid #2dd4bf55;border-radius:6px;padding:0 4px">${correctText}</mark>`);
902
+ qNode.innerHTML = highlighted;
903
+ }
904
+ }
905
  return;
906
  }
907
 
908
+ chosenLabel.classList.add('err');
 
909
  if (badge){ badge.hidden=false; badge.className='q-badge err'; badge.textContent='Incorrect.'; }
910
  if (note) note.textContent = '';
 
911
  });
912
 
913
  return 'wired-multi2';
 
918
  with gr.Blocks(title="Question Generator", css=CSS) as demo:
919
  gr.Markdown("<h2 class='top'>Question Generator</h2>")
920
 
 
921
  page1 = gr.Group(visible=True, elem_classes=["input-panel"])
922
  with page1:
923
  gr.Markdown("اختر **أحد** الخيارين ثم اضغط الزر.", elem_classes=["small"])
 
925
  file_comp = gr.File(label="أو ارفع ملف (PDF / TXT)", file_count="single",
926
  file_types=[".pdf",".txt"], type="filepath", elem_classes=["upload-like"])
927
  num_q = gr.Slider(4, 20, value=DEFAULT_NUM_QUESTIONS, step=1, label="عدد الأسئلة")
928
+
929
+ mode_radio = gr.Radio(choices=QUESTION_MODES, value="فراغ", label="نوع السؤال")
930
+ difficulty_radio = gr.Radio(choices=DIFFICULTY_MODES, value="متوسط", label="درجة الصعوبة")
931
+
932
  with gr.Accordion("خيارات PDF المصوّر (اختياري)", open=False):
933
  trocr_model = gr.Dropdown(
934
  choices=[
 
940
  value=DEFAULT_TROCR_MODEL, label="نموذج TrOCR"
941
  )
942
  trocr_zoom = gr.Slider(2.0, 3.5, value=DEFAULT_TROCR_ZOOM, step=0.1, label="Zoom OCR")
943
+
944
  btn_build = gr.Button("generate quistion", elem_classes=["button-primary"])
945
  warn = gr.Markdown("", elem_classes=["small"])
946
 
 
947
  page2 = gr.Group(visible=False)
948
  with page2:
949
  quiz_html = gr.HTML("")
950
+ js_wired = gr.Textbox(visible=False)
951
 
 
952
  btn_build.click(
953
  build_quiz,
954
+ inputs=[text_area, file_comp, num_q, trocr_model, trocr_zoom, mode_radio, difficulty_radio],
955
  outputs=[quiz_html, page1, page2, warn]
956
  ).then(
957
  None, inputs=None, outputs=[js_wired], js=ATTACH_LISTENERS_JS