Leen172 commited on
Commit
282d730
·
verified ·
1 Parent(s): ecd3544

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -479
app.py CHANGED
@@ -1,10 +1,10 @@
1
  # -*- coding: utf-8 -*-
2
  # صفحتان ثابتتان + Submit لكل سؤال يعمل فعليًا + منع تغيّر أبعاد صفحة الإدخال
3
 
4
- import os, json, uuid, random, unicodedata, difflib, traceback
5
  from dataclasses import dataclass
6
  from pathlib import Path
7
- from typing import List, Tuple, Optional
8
 
9
  from PIL import Image
10
  from pypdf import PdfReader
@@ -105,7 +105,7 @@ def postprocess(raw:str)->str:
105
  t = re2.sub(r"\[\d+\]", " ", t)
106
  return norm_ar(t)
107
 
108
- # ------------------ أدوات ذكية داخلية ------------------
109
  SENT_SPLIT = re2.compile(r"(?<=[\.!؟\?])\s+")
110
  AR_STOP = set("""في على من إلى عن مع لدى ذلك هذه هذا الذين التي الذي أو أم إن أن كان تكون كانوا كانت كنت ثم قد لقد ربما بل لكن إلا سوى حتى حيث كما لما ما لماذا متى أين كيف أي هناك هنا هؤلاء أولئك نحن هو هي هم هن أنت أنتم أنتن""".split())
111
 
@@ -120,430 +120,52 @@ def split_sents(t:str)->List[str]:
120
  s=[x.strip() for x in SENT_SPLIT.split(t) if x.strip()]
121
  return [x for x in s if len(x)>=25]
122
 
123
- # --- (A) عبارات مفتاحية 1–3 كلمات + إزالة التداخل ---
124
- def yake_keyphrases(t: str, top_k: int = 180) -> List[str]:
125
- phrases = []
126
- seen = set()
127
- for n in [3, 2, 1]:
128
- try:
129
- ex = yake.KeywordExtractor(lan='ar', n=n, top=top_k)
130
- pairs = ex.extract_keywords(t)
131
- except Exception:
132
- pairs = []
133
- for w, _ in pairs:
134
- w = re2.sub(r"\s+", " ", w.strip())
135
- if not w or w in seen:
136
- continue
137
- if re2.match(r"^[\p{P}\p{S}\d_]+$", w):
138
- continue
139
- if 2 <= len(w) <= 42:
140
- phrases.append(w); seen.add(w)
141
- phrases_sorted = sorted(phrases, key=lambda x: (-len(x), x))
142
- kept=[]
143
- for p in phrases_sorted:
144
- if not any((p != q and p in q) for q in kept):
145
- kept.append(p)
146
- return kept
147
 
148
  def good_kw(kw:str)->bool:
149
- return kw and len(kw)>=2 and kw not in AR_STOP and not re2.match(r"^[\p{P}\p{S}\d_]+$", kw)
 
 
 
 
 
 
 
 
 
 
 
 
150
 
151
- # --- (B) تضمين جُمل/عبارات + كاش ---
152
- _EMB = None
153
- def get_embedder():
154
- global _EMB
155
- if _EMB is None:
156
- try:
157
- from sentence_transformers import SentenceTransformer
158
- _EMB = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
159
- except Exception:
160
- _EMB = False
161
- return _EMB
162
-
163
- def embed_texts(texts: List[str]):
164
- emb = get_embedder()
165
- if not emb:
166
- return None
167
- return emb.encode(texts, normalize_embeddings=True)
168
-
169
- # --- (C) Fill-Mask عربي (AraBERT) + كاش ---
170
- _MLM = None
171
- def get_masker():
172
- global _MLM
173
- if _MLM is None:
174
- try:
175
- from transformers import pipeline
176
- _MLM = pipeline("fill-mask", model="aubmindlab/bert-base-arabertv02")
177
- except Exception:
178
- _MLM = False
179
- return _MLM
180
-
181
- def mlm_fill(sentence_with_blank: str, correct: str, k: int = 20) -> List[str]:
182
- masker = get_masker()
183
- if not masker:
184
- return []
185
- masked = sentence_with_blank.replace("_____", masker.tokenizer.mask_token)
186
- try:
187
- outs = masker(masked, top_k=max(25, k+10))
188
- cands = []
189
- for o in outs:
190
- tok = o["token_str"].strip()
191
- if tok and tok != correct and len(tok) >= 2 and not re2.match(r"^[\p{P}\p{S}\d_]+$", tok):
192
- cands.append(tok)
193
- seen=set(); uniq=[]
194
- for w in cands:
195
- if w not in seen:
196
- uniq.append(w); seen.add(w)
197
- return uniq[:k]
198
- except Exception:
199
- return []
200
-
201
- # --- (D) جيران دلاليًا لعبارة الهدف ---
202
- def nearest_terms(target: str, pool: List[str], k: int = 32) -> List[str]:
203
- emb = get_embedder()
204
- if not emb:
205
- return []
206
- cand = [w for w in pool if w != target and len(w) >= 2 and not re2.match(r"^[\p{P}\p{S}\d_]+$", w)]
207
- if not cand:
208
- return []
209
- vecs = emb.encode([target] + cand, normalize_embeddings=True)
210
- t, C = vecs[0], vecs[1:]
211
- import numpy as np
212
- sims = (C @ t)
213
- idx = np.argsort(-sims)[:k]
214
- return [cand[i] for i in idx]
215
-
216
- # --- (E) POS اختياري عبر Camel Tools ---
217
- _TAGGER = None
218
- def get_tagger():
219
- global _TAGGER
220
- if _TAGGER is None:
221
- try:
222
- from camel_tools.disambig.mle import MLEDisambiguator
223
- _TAGGER = MLEDisambiguator.pretrained()
224
- except Exception:
225
- _TAGGER = False
226
- return _TAGGER
227
-
228
- def phrase_pos(phrase: str) -> Optional[str]:
229
- tagger = get_tagger()
230
- if not tagger:
231
- return None
232
- try:
233
- toks = phrase.split()
234
- res = tagger.disambiguate(toks)
235
- return res[0].analyses[0].pos
236
- except Exception:
237
- return None
238
-
239
- def same_pos(a: str, b: str) -> bool:
240
- pa, pb = phrase_pos(a), phrase_pos(b)
241
- if pa is None or pb is None:
242
- return True
243
- return (pa == pb)
244
-
245
- # --- (F) تطبيع صرفي بسيط (الـ) ---
246
- def strip_al(s: str) -> str:
247
- return re2.sub(r"^\s*ال", "", s)
248
-
249
- def with_same_definiteness(ref: str, cand: str) -> str:
250
- ref_has_al = re2.match(r"^\s*ال", ref) is not None
251
- cand_has_al = re2.match(r"^\s*ال", cand) is not None
252
- if ref_has_al and not cand_has_al:
253
- return "ال" + cand
254
- if (not ref_has_al) and cand_has_al:
255
- return strip_al(cand)
256
- return cand
257
-
258
- # --- (G) تقييم الجملة للسؤال ---
259
- def sentence_score(s: str) -> float:
260
- L = len(s)
261
- base = 1.0 if (70 <= L <= 240) else -1.0
262
- punct = len(re2.findall(r"[^\p{L}\p{N}\s]", s))
263
- digits = len(re2.findall(r"\d", s))
264
- penalties = 0.0
265
- if punct > 10: penalties -= 0.5
266
- if digits > 6: penalties -= 0.5
267
- bonus = 0.2 if ("،" in s or ":" in s) else 0.0
268
- return base + bonus + penalties
269
-
270
- # ================== (NEW) جودة المشتِّتات والتطويل ==================
271
-
272
- global_full_text_cache = ""
273
- ref_phrase_cache = {}
274
-
275
- ADJ_WHITELIST = {"التعليمية","الذكية","الرقمية","الافتراضية","التكيفية","الحديثة","المتقدمة"}
276
- NOUN_PREFIXES = {"مجال","تقنيات","أنظمة","مفاهيم","نماذج","ممارسات","آليات","تطبيقات"}
277
-
278
- def is_arabic_word(w:str)->bool:
279
- return bool(re2.match(r"^[\p{Arabic}]+$", w))
280
-
281
- def clean_spaces(s:str)->str:
282
- s = re2.sub(r"\s+", " ", s).strip()
283
- s = re2.sub(r"\bال\s+ال\b", "ال", s)
284
- return s
285
-
286
- def bad_token(w:str)->bool:
287
- return (not is_arabic_word(w)) or (len(w) < 2 or len(w) > 18)
288
-
289
- def looks_weird(phrase:str)->bool:
290
- toks = [t for t in re2.split(r"\s+", phrase.strip()) if t]
291
- if len(toks) == 0: return True
292
- if any(bad_token(t) for t in toks): return True
293
- for i in range(1, len(toks)):
294
- if toks[i] == toks[i-1]:
295
- return True
296
- if len(set(toks)) <= len(toks) - 1:
297
- if any(toks.count(t) > 1 for t in toks):
298
- return True
299
- pos = [phrase_pos(t) or "" for t in toks]
300
- streak = 0
301
- for p in pos:
302
- if p.startswith("ADJ"):
303
- streak += 1
304
- if streak > 2: return True
305
- else:
306
- streak = 0
307
- return False
308
-
309
- def quality_score(phrase:str, sentence:str, full_text:str)->float:
310
- phrase = clean_spaces(phrase)
311
- if looks_weird(phrase):
312
- return 0.0
313
- hits = sum(1 for t in set(phrase.split()) if t in full_text)
314
- toks = phrase.split()
315
- pos0 = phrase_pos(toks[0]) or ""
316
- pos1 = phrase_pos(toks[1]) if len(toks)>1 else ""
317
- nominal_bonus = 0.2 if (pos0.startswith("N") and (not pos1 or pos1.startswith("ADJ"))) else 0.0
318
- return min(1.0, 0.3 + 0.1*hits + nominal_bonus)
319
-
320
- def word_len(s: str) -> int:
321
- return len([w for w in re2.split(r"\s+", s.strip()) if w])
322
-
323
- def within_ratio(cand: str, target_len: int, tol: float = 0.15) -> bool:
324
- L = word_len(cand)
325
- return (target_len*(1-tol) <= L <= target_len*(1+tol))
326
-
327
- def shape_phrase_like(ref: str, cand: str) -> str:
328
- return with_same_definiteness(ref, cand)
329
-
330
- def try_mlm_expand(cand: str, sentence_with_blank: str, target_len: int) -> Optional[str]:
331
- masker = get_masker()
332
- if not masker:
333
- return None
334
- trials = [
335
- sentence_with_blank.replace("_____", f"{masker.tokenizer.mask_token} {cand}"),
336
- sentence_with_blank.replace("_____", f"{cand} {masker.tokenizer.mask_token}")
337
- ]
338
- for masked in trials:
339
- try:
340
- outs = masker(masked, top_k=12)
341
- except Exception:
342
- continue
343
- for o in outs:
344
- tok = o["token_str"].strip()
345
- if not is_arabic_word(tok):
346
- continue
347
- if masked.startswith(masker.tokenizer.mask_token):
348
- if tok not in NOUN_PREFIXES:
349
- continue
350
- phrase = f"{tok} {cand}"
351
- else:
352
- if tok not in ADJ_WHITELIST:
353
- continue
354
- phrase = f"{cand} {tok}"
355
- phrase = clean_spaces(phrase)
356
- if within_ratio(phrase, target_len, tol=0.15) and norm_ar(phrase) != norm_ar(ref_phrase_cache.get("correct","")) and not looks_weird(phrase):
357
- return phrase
358
- return None
359
-
360
- def fallback_expand(cand: str, target_len: int) -> str:
361
- for p in NOUN_PREFIXES:
362
- phrase = f"{p} {cand}"
363
- if within_ratio(phrase, target_len, tol=0.15):
364
- return clean_spaces(phrase)
365
- for sfx in ADJ_WHITELIST:
366
- phrase = f"{cand} {sfx}"
367
- if within_ratio(phrase, target_len, tol=0.15):
368
- return clean_spaces(phrase)
369
- candidates = [f"{p} {cand}" for p in NOUN_PREFIXES] + [f"{cand} {sfx}" for sfx in ADJ_WHITELIST]
370
- candidates = sorted(candidates, key=lambda ph: abs(word_len(ph) - target_len))
371
- return clean_spaces(candidates[0])
372
-
373
- # --- (H*) ترتيب المرشّحات بالانسجام + الجودة + منع التشابه ---
374
- def rank_by_sentence_coherence(sentence_with_blank: str, correct: str, candidates: List[str], topk: int=3, full_text: str="") -> List[str]:
375
- emb = get_embedder()
376
- if not candidates:
377
- return []
378
- coherence = {}
379
- if emb:
380
- filled = [sentence_with_blank.replace("_____", c) for c in candidates]
381
- ref = sentence_with_blank.replace("_____", correct)
382
- vecs = embed_texts([ref] + filled)
383
- if vecs is not None:
384
- import numpy as np
385
- ref_vec = vecs[0]; cand_vecs = vecs[1:]
386
- sims = cand_vecs @ ref_vec
387
- for i, c in enumerate(candidates):
388
- coherence[c] = float(sims[i])
389
- qscore = {c: quality_score(c, sentence_with_blank, full_text) for c in candidates}
390
- def final_score(c):
391
- coh = coherence.get(c, 0.0)
392
- return 0.7*coh + 0.3*qscore.get(c, 0.0)
393
- ranked = sorted(candidates, key=lambda c: final_score(c), reverse=True)
394
-
395
- kept = []
396
- for c in ranked:
397
- if all(difflib.SequenceMatcher(None, c, x).ratio() < 0.90 for x in kept):
398
- kept.append(c)
399
- if len(kept) >= topk:
400
- break
401
- return kept[:topk]
402
-
403
- # --- (I) حصاد مصطلحات احتياطية عالية التكرار من النص كله ---
404
- def harvest_backup_terms(text: str, limit: int = 400) -> List[str]:
405
- toks = re2.findall(r"[\p{L}][\p{L}\p{N}_\-]{1,}", text)
406
- stats = {}
407
- for t in toks:
408
- tt = norm_ar(t)
409
- if not good_kw(tt):
410
- continue
411
- stats[tt] = stats.get(tt, 0) + 1
412
- top = [w for w,_ in sorted(stats.items(), key=lambda kv: -kv[1])]
413
- return top[:limit]
414
-
415
- # --- (J) مشتّتات ذكية تضمن دائمًا ≥3 خيارات فعلية + موازنة الطول ---
416
- def smart_distractors(correct: str, phrase_pool: List[str], sentence_with_blank: str, backup_terms: List[str], k: int = 3) -> List[str]:
417
- target = correct.strip()
418
- ref_phrase_cache["correct"] = target
419
-
420
- neigh = nearest_terms(target, phrase_pool, k=48)
421
- mlm = mlm_fill(sentence_with_blank, target, k=24)
422
-
423
- raw_pool = []
424
- seen=set()
425
- for w in neigh + mlm + phrase_pool:
426
- w = w.strip()
427
- if not w or norm_ar(w) == norm_ar(target):
428
- continue
429
- if w in AR_STOP or re2.match(r"^[\p{P}\p{S}\d_]+$", w):
430
- continue
431
- if w not in seen:
432
- seen.add(w); raw_pool.append(w)
433
-
434
- for w in backup_terms:
435
- if len(raw_pool) >= max(60, k*10): break
436
- if not w or norm_ar(w) == norm_ar(target):
437
- continue
438
- if w in AR_STOP or re2.match(r"^[\p{P}\p{S}\d_]+$", w):
439
- continue
440
- if w not in seen:
441
- seen.add(w); raw_pool.append(w)
442
-
443
- filtered = []
444
- for w in raw_pool:
445
- if same_pos(target, w):
446
- filtered.append(w)
447
- if len(filtered) >= max(24, k*6):
448
- break
449
- if not filtered:
450
- filtered = raw_pool[:max(24, k*6)]
451
-
452
- target_words = word_len(target)
453
- shaped = []
454
- for w in filtered:
455
- cand = shape_phrase_like(target, w)
456
- if within_ratio(cand, target_words, tol=0.15) and not looks_weird(cand):
457
- shaped.append(clean_spaces(cand))
458
- continue
459
- expanded = try_mlm_expand(cand, sentence_with_blank, target_words)
460
- if expanded and within_ratio(expanded, target_words, tol=0.15) and not looks_weird(expanded):
461
- shaped.append(clean_spaces(expanded))
462
- continue
463
- fb = fallback_expand(cand, target_words)
464
- if not looks_weird(fb):
465
- shaped.append(clean_spaces(fb))
466
-
467
- shaped = [s for s in shaped if norm_ar(s) != norm_ar(target)]
468
-
469
- ranked = rank_by_sentence_coherence(
470
- sentence_with_blank, target, shaped, topk=max(k, 12), full_text=global_full_text_cache
471
- )
472
-
473
- out = []
474
- for src in [ranked, shaped, filtered, raw_pool, backup_terms]:
475
- for w in src:
476
- if len(out) >= k: break
477
- if w and norm_ar(w) != norm_ar(target) and w not in out and not looks_weird(w):
478
- out.append(w)
479
- if len(out) >= k: break
480
-
481
- if len(out) < k:
482
- while len(out) < k and ranked:
483
- out.append(ranked[len(out) % len(ranked)])
484
-
485
- return out[:k]
486
-
487
- # ------------------ مُولِّد الأسئلة ------------------
488
  def make_mcqs(text:str, n:int=6)->List[MCQ]:
489
- global global_full_text_cache
490
- global_full_text_cache = text
491
-
492
- sents = split_sents(text)
493
- if not sents:
494
- raise ValueError("النص قصير أو غير صالح.")
495
-
496
- keyphrases = [kp for kp in yake_keyphrases(text, top_k=180) if good_kw(kp)]
497
- if not keyphrases:
498
- tokens = [t for t in re2.findall(r"[\p{L}\p{N}_]+", text) if good_kw(t)]
499
- freq = [w for w,_ in sorted(((t, text.count(t)) for t in tokens), key=lambda x:-x[1])]
500
- keyphrases = freq[:160]
501
-
502
- backup_terms = harvest_backup_terms(text, limit=400)
503
-
504
- kp2best_sent = {}
505
- for kp in keyphrases:
506
- best_s, best_sc = None, -9e9
507
- pat = re2.compile(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})")
508
- for s in sents:
509
- if pat.search(s):
510
- sc = sentence_score(s)
511
- if sc > best_sc:
512
- best_s, best_sc = s, sc
513
- if best_s is not None:
514
- kp2best_sent[kp] = (best_s, best_sc)
515
-
516
- if not kp2best_sent:
517
- raise RuntimeError("تعذّر توليد أسئلة من هذا النص.")
518
-
519
- order = sorted(kp2best_sent.items(), key=lambda kv: (-len(kv[0]), -kv[1][1], kv[0]))
520
-
521
- items=[]; used_sents=set(); used_keys=set()
522
- for kp, (s, _) in order:
523
- if len(items) >= n:
524
- break
525
- if s in used_sents or kp in used_keys:
526
- continue
527
-
528
- q = re2.sub(rf"(?<!\p{{L}}){re2.escape(kp)}(?!\p{{L}})", "_____", s, count=1)
529
-
530
- pool = [x for x in keyphrases if x != kp]
531
- distracts = smart_distractors(kp, pool, q, backup_terms, k=3)
532
-
533
- ch = distracts + [kp]
534
-
535
- # ترتيب غير عشوائي: تدوير حتمي لموضع الصحيحة
536
- ch_sorted = sorted(ch, key=lambda c: c != kp)
537
- rot = (len(items) + (hash(kp) & 3)) % 4
538
- ch = ch_sorted[-rot:] + ch_sorted[:-rot]
539
-
540
- ans = ch.index(kp)
541
-
542
  items.append(MCQ(id=str(uuid.uuid4())[:8], question=q, choices=ch, answer_index=ans))
543
- used_sents.add(s); used_keys.add(kp)
544
-
545
- if not items:
546
- raise RuntimeError("تعذّر توليد أسئلة.")
547
  return items
548
 
549
  def to_records(items:List[MCQ])->List[dict]:
@@ -553,9 +175,7 @@ def to_records(items:List[MCQ])->List[dict]:
553
  for i,lbl in enumerate(["A","B","C","D"]):
554
  txt=(it.choices[i] if i<len(it.choices) else "—").strip()
555
  txt=txt.replace(",", "،").replace("?", "؟").replace(";", "؛")
556
- if txt == "—" or not txt:
557
- txt = "خيار"
558
- opts.append({"id":lbl,"text":txt or "خيار","is_correct":(i==it.answer_index)})
559
  recs.append({"id":it.id,"question":it.question.strip(),"options":opts})
560
  return recs
561
 
@@ -594,56 +214,17 @@ def render_quiz_html(records: List[dict]) -> str:
594
 
595
  # ------------------ توليد الامتحان وتبديل الصفحات ------------------
596
  def build_quiz(text_area, file_path, n, model_id, zoom):
597
- try:
598
- text_area = (text_area or "").strip()
599
- if not text_area and not file_path:
600
- return "", gr.update(visible=True), gr.update(visible=False), "🛈 الصق نصًا أو ارفع ملفًا أولًا."
601
-
602
- if text_area:
603
- raw = text_area
604
- else:
605
- if isinstance(file_path, (list, tuple)) and file_path:
606
- file_path = file_path[0]
607
- if not file_path or not os.path.exists(file_path):
608
- return "", gr.update(visible=True), gr.update(visible=False), "⚠️ تعذّر الوصول للملف المرفوع."
609
- raw, _ = file_to_text(str(file_path), model_id=model_id, zoom=float(zoom))
610
-
611
- cleaned = postprocess(raw)
612
-
613
- try:
614
- items = make_mcqs(cleaned, n=int(n))
615
- except Exception as inner_e:
616
- # Fallback بسيط يضمن توليد أسئلة حتى لو تعطل المسار الذكي
617
- sents = split_sents(cleaned)[:int(n)*2]
618
- if not sents:
619
- raise inner_e
620
- recs_items = []
621
- import itertools
622
- for s in sents:
623
- toks = [t for t in re2.findall(r"[\p{L}]{3,}", s) if t not in AR_STOP]
624
- if len(toks) < 4:
625
- continue
626
- kw = toks[len(toks)//3]
627
- q = re2.sub(rf"(?<!\p{{L}}){re2.escape(kw)}(?!\p{{L}})", "_____", s, count=1)
628
- pool = [w for w in toks if w != kw][:30]
629
- random.shuffle(pool)
630
- dis = list(dict.fromkeys(pool))[:3]
631
- while len(dis) < 3: dis.append("اختيار")
632
- ch = dis + [kw]; random.shuffle(ch)
633
- recs_items.append(MCQ(id=str(uuid.uuid4())[:8], question=q, choices=ch, answer_index=ch.index(kw)))
634
- if len(recs_items) >= int(n):
635
- break
636
- if not recs_items:
637
- raise inner_e
638
- items = recs_items
639
-
640
- recs = to_records(items)
641
- html = render_quiz_html(recs)
642
- return html, gr.update(visible=False), gr.update(visible=True), ""
643
- except Exception as e:
644
- err = f"❌ حدث خطأ أثناء التوليد:\n```\n{str(e)}\n```"
645
- traceback.print_exc()
646
- return "", gr.update(visible=True), gr.update(visible=False), err
647
 
648
  # ------------------ CSS ------------------
649
  CSS = """
@@ -651,7 +232,7 @@ CSS = """
651
  --bg:#0e0e11; --panel:#15161a; --card:#1a1b20; --muted:#a7b0be;
652
  --text:#f6f7fb; --accent:#6ee7b7; --accent2:#34d399; --danger:#ef4444; --border:#262833;
653
  }
654
- body{direction:rtl; font-family:system-ui,'Cairo','IBM Plex Arabic',sans-serif; background:#0e0e11;}
655
  .gradio-container{max-width:980px;margin:0 auto;padding:12px 12px 40px;}
656
  h2.top{color:#eaeaf2;margin:6px 0 16px}
657
 
@@ -674,7 +255,7 @@ textarea{min-height:120px}
674
  .q-header{display:flex;gap:10px;align-items:center;justify-content:space-between;margin-bottom:6px}
675
  .q-title{color:#eaeaf2;font-weight:800}
676
  .q-badge{padding:8px 12px;border-radius:10px;font-weight:700}
677
- .q-badge.ok{background:#0f2f22;color:#b6f4db;border:1px solid #145b44}
678
  .q-badge.err{background:#3a0d14;color:#ffd1d6;border:1px solid #6a1e2b}
679
 
680
  .q-text{color:var(--text);font-size:1.06rem;line-height:1.8;margin:8px 0 12px}
@@ -698,6 +279,7 @@ textarea{min-height:120px}
698
  # ------------------ JS: ربط Submit بعد الرندر (مع Output مخفي لضمان التنفيذ) ------------------
699
  ATTACH_LISTENERS_JS = """
700
  () => {
 
701
  if (window.__q_submit_bound_multi2) { return 'already'; }
702
  window.__q_submit_bound_multi2 = true;
703
 
@@ -720,28 +302,35 @@ ATTACH_LISTENERS_JS = """
720
 
721
  const chosenLabel = chosen.closest('.opt');
722
 
 
723
  if (chosen.value === correct) {
724
  chosenLabel.classList.add('ok');
725
  if (badge){ badge.hidden=false; badge.className='q-badge ok'; badge.textContent='Correct!'; }
 
726
  card.querySelectorAll('input[type="radio"]').forEach(i => i.disabled = true);
727
  e.target.disabled = true;
728
  if (note) note.textContent = '';
729
  return;
730
  }
731
 
732
- chosenLabel.classList.add('err');
 
733
  if (badge){ badge.hidden=false; badge.className='q-badge err'; badge.textContent='Incorrect.'; }
734
  if (note) note.textContent = '';
 
735
  });
736
 
737
  return 'wired-multi2';
738
  }
739
  """
740
 
 
 
741
  # ------------------ واجهة Gradio ------------------
742
  with gr.Blocks(title="Question Generator", css=CSS) as demo:
743
  gr.Markdown("<h2 class='top'>Question Generator</h2>")
744
 
 
745
  page1 = gr.Group(visible=True, elem_classes=["input-panel"])
746
  with page1:
747
  gr.Markdown("اختر **أحد** الخيارين ثم اضغط الزر.", elem_classes=["small"])
@@ -763,11 +352,13 @@ with gr.Blocks(title="Question Generator", css=CSS) as demo:
763
  btn_build = gr.Button("generate quistion", elem_classes=["button-primary"])
764
  warn = gr.Markdown("", elem_classes=["small"])
765
 
 
766
  page2 = gr.Group(visible=False)
767
  with page2:
768
  quiz_html = gr.HTML("")
769
- js_wired = gr.Textbox(visible=False)
770
 
 
771
  btn_build.click(
772
  build_quiz,
773
  inputs=[text_area, file_comp, num_q, trocr_model, trocr_zoom],
 
1
  # -*- coding: utf-8 -*-
2
  # صفحتان ثابتتان + Submit لكل سؤال يعمل فعليًا + منع تغيّر أبعاد صفحة الإدخال
3
 
4
+ import os, json, uuid, random, unicodedata
5
  from dataclasses import dataclass
6
  from pathlib import Path
7
+ from typing import List, Tuple
8
 
9
  from PIL import Image
10
  from pypdf import PdfReader
 
105
  t = re2.sub(r"\[\d+\]", " ", t)
106
  return norm_ar(t)
107
 
108
+ # ------------------ توليد أسئلة ------------------
109
  SENT_SPLIT = re2.compile(r"(?<=[\.!؟\?])\s+")
110
  AR_STOP = set("""في على من إلى عن مع لدى ذلك هذه هذا الذين التي الذي أو أم إن أن كان تكون كانوا كانت كنت ثم قد لقد ربما بل لكن إلا سوى حتى حيث كما لما ما لماذا متى أين كيف أي هناك هنا هؤلاء أولئك نحن هو هي هم هن أنت أنتم أنتن""".split())
111
 
 
120
  s=[x.strip() for x in SENT_SPLIT.split(t) if x.strip()]
121
  return [x for x in s if len(x)>=25]
122
 
123
+ def yake_keywords(t:str, k:int=160)->List[str]:
124
+ ex = yake.KeywordExtractor(lan='ar', n=1, top=k)
125
+ cands = [w for w,_ in ex.extract_keywords(t)]
126
+ out=[]; seen=set()
127
+ for k in cands:
128
+ k=k.strip()
129
+ if not k or k in seen or k in AR_STOP: continue
130
+ if len(k)<3 or re2.match(r"^[\p{P}\p{S}]+$",k): continue
131
+ seen.add(k); out.append(k)
132
+ return out
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
  def good_kw(kw:str)->bool:
135
+ return kw and len(kw)>=3 and kw not in AR_STOP and not re2.match(r"^[\p{P}\p{S}\d_]+$", kw)
136
+
137
+ def distractors(correct:str, pool:List[str], k:int=3)->List[str]:
138
+ L=len(correct.strip()); cand=[]
139
+ for w in pool:
140
+ w=w.strip()
141
+ if not w or w==correct or w in AR_STOP: continue
142
+ if re2.match(r"^[\p{P}\p{S}\d_]+$", w): continue
143
+ if abs(len(w)-L)<=3: cand.append(w)
144
+ random.shuffle(cand)
145
+ out=cand[:k]
146
+ while len(out)<k: out.append("—")
147
+ return out
148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  def make_mcqs(text:str, n:int=6)->List[MCQ]:
150
+ sents=split_sents(text)
151
+ if not sents: raise ValueError("النص قصير أو غير صالح.")
152
+ kws=yake_keywords(text) or [w for w,_ in sorted(((t, text.count(t)) for t in re2.findall(r"[\p{L}\p{N}_]+",text)), key=lambda x:-x[1])][:80]
153
+ sent_for={}
154
+ for s in sents:
155
+ for kw in kws:
156
+ if good_kw(kw) and re2.search(rf"(?<!\p{{L}}){re2.escape(kw)}(?!\p{{L}})", s) and kw not in sent_for:
157
+ sent_for[kw]=s
158
+ items=[]; used=set()
159
+ for kw in [k for k in kws if k in sent_for]:
160
+ if len(items)>=n: break
161
+ s=sent_for[kw]
162
+ if s in used: continue
163
+ q=re2.sub(rf"(?<!\p{{L}}){re2.escape(kw)}(?!\p{{L}})", "_____", s, count=1)
164
+ ch=distractors(kw, [x for x in kws if x!=kw], 3)+[kw]
165
+ random.shuffle(ch); ans=ch.index(kw)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  items.append(MCQ(id=str(uuid.uuid4())[:8], question=q, choices=ch, answer_index=ans))
167
+ used.add(s)
168
+ if not items: raise RuntimeError("تعذّر توليد أسئلة.")
 
 
169
  return items
170
 
171
  def to_records(items:List[MCQ])->List[dict]:
 
175
  for i,lbl in enumerate(["A","B","C","D"]):
176
  txt=(it.choices[i] if i<len(it.choices) else "—").strip()
177
  txt=txt.replace(",", "،").replace("?", "؟").replace(";", "؛")
178
+ opts.append({"id":lbl,"text":txt or "—","is_correct":(i==it.answer_index)})
 
 
179
  recs.append({"id":it.id,"question":it.question.strip(),"options":opts})
180
  return recs
181
 
 
214
 
215
  # ------------------ توليد الامتحان وتبديل الصفحات ------------------
216
  def build_quiz(text_area, file_path, n, model_id, zoom):
217
+ text_area = (text_area or "").strip()
218
+ if not text_area and not file_path:
219
+ return "", gr.update(visible=True), gr.update(visible=False), "🛈 الصق نصًا أو ارفع ملفًا أولًا."
220
+ if text_area:
221
+ raw = text_area
222
+ else:
223
+ raw, _ = file_to_text(file_path, model_id=model_id, zoom=float(zoom))
224
+ cleaned = postprocess(raw)
225
+ items = make_mcqs(cleaned, n=int(n))
226
+ recs = to_records(items)
227
+ return render_quiz_html(recs), gr.update(visible=False), gr.update(visible=True), ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
 
229
  # ------------------ CSS ------------------
230
  CSS = """
 
232
  --bg:#0e0e11; --panel:#15161a; --card:#1a1b20; --muted:#a7b0be;
233
  --text:#f6f7fb; --accent:#6ee7b7; --accent2:#34d399; --danger:#ef4444; --border:#262833;
234
  }
235
+ body{direction:rtl; font-family:system-ui,'Cairo','IBM Plex Arabic',sans-serif; background:var(--bg);}
236
  .gradio-container{max-width:980px;margin:0 auto;padding:12px 12px 40px;}
237
  h2.top{color:#eaeaf2;margin:6px 0 16px}
238
 
 
255
  .q-header{display:flex;gap:10px;align-items:center;justify-content:space-between;margin-bottom:6px}
256
  .q-title{color:#eaeaf2;font-weight:800}
257
  .q-badge{padding:8px 12px;border-radius:10px;font-weight:700}
258
+ .q-badge.ok{background:#083a2a;color:#b6f4db;border:1px solid #145b44}
259
  .q-badge.err{background:#3a0d14;color:#ffd1d6;border:1px solid #6a1e2b}
260
 
261
  .q-text{color:var(--text);font-size:1.06rem;line-height:1.8;margin:8px 0 12px}
 
279
  # ------------------ JS: ربط Submit بعد الرندر (مع Output مخفي لضمان التنفيذ) ------------------
280
  ATTACH_LISTENERS_JS = """
281
  () => {
282
+ // اربط مرة واحدة فقط
283
  if (window.__q_submit_bound_multi2) { return 'already'; }
284
  window.__q_submit_bound_multi2 = true;
285
 
 
302
 
303
  const chosenLabel = chosen.closest('.opt');
304
 
305
+ // حالة صحيحة: لوّن أخضر وأقفل السؤال كاملاً
306
  if (chosen.value === correct) {
307
  chosenLabel.classList.add('ok');
308
  if (badge){ badge.hidden=false; badge.className='q-badge ok'; badge.textContent='Correct!'; }
309
+ // أقفل هذا السؤال فقط بعد الصح
310
  card.querySelectorAll('input[type="radio"]').forEach(i => i.disabled = true);
311
  e.target.disabled = true;
312
  if (note) note.textContent = '';
313
  return;
314
  }
315
 
316
+ // حالة خاطئة: لوّن أحمر فقط، ولا تعطل أي شيء — ليقدر يجرّب خيار آخر
317
+ chosenLabel.classList.add('err'); // اتركه أحمر
318
  if (badge){ badge.hidden=false; badge.className='q-badge err'; badge.textContent='Incorrect.'; }
319
  if (note) note.textContent = '';
320
+ // مهم: لا تعطّل الراديو ولا الزر
321
  });
322
 
323
  return 'wired-multi2';
324
  }
325
  """
326
 
327
+
328
+
329
  # ------------------ واجهة Gradio ------------------
330
  with gr.Blocks(title="Question Generator", css=CSS) as demo:
331
  gr.Markdown("<h2 class='top'>Question Generator</h2>")
332
 
333
+ # الصفحة 1: إدخال ثابت لا تتغير أبعاده
334
  page1 = gr.Group(visible=True, elem_classes=["input-panel"])
335
  with page1:
336
  gr.Markdown("اختر **أحد** الخيارين ثم اضغط الزر.", elem_classes=["small"])
 
352
  btn_build = gr.Button("generate quistion", elem_classes=["button-primary"])
353
  warn = gr.Markdown("", elem_classes=["small"])
354
 
355
+ # الصفحة 2: الأسئلة
356
  page2 = gr.Group(visible=False)
357
  with page2:
358
  quiz_html = gr.HTML("")
359
+ js_wired = gr.Textbox(visible=False) # Output مخفي لضمان تنفيذ JS
360
 
361
+ # بناء الامتحان + تبديل الصفحات + ربط الـJS
362
  btn_build.click(
363
  build_quiz,
364
  inputs=[text_area, file_comp, num_q, trocr_model, trocr_zoom],