Spaces:

Leen172
/

mcq-generator

Sleeping

App Files Files Community

Leen172 commited on Oct 27, 2025

Commit

677ac21

verified ·

1 Parent(s): ca0d594

Upload 3 files

Browse files

Files changed (3) hide show

app.py +168 -0
requirements.txt +5 -0
styles.css +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,168 @@

+import gradio as gr
+import io, json, csv, uuid, random, re
+import regex as re2, yake
+random.seed(42)
+# ====== المعالجة الأساسية ======
+AR_STOP = set("في على من إلى عن مع لدى ذلك هذه هذا الذين التي الذي اللواتي اللواتيا أو أم إن أن كان تكون كانوا كانت كنت كنا كانا كانتِ ثم قد لقد ربما بل لكن لكنَّ إلا سوى حتى حيث كما لما لماّ لماَّ لماً ما ماذا لماذا متى أين كيف أي أيّ أيُّ هناك هنا هناكَ تلك ذلكم ذلكن أولئك هؤلاء هما هن هم أنتِ أنتَ أنتما أنتن أنتم أنا نحن هي هو هنَّ همَّ".split())
+SENT_SPLIT = re2.compile(r"(?<=[\.!؟\?])\s+")
+def clean_text_basic(txt:str)->str:
+    txt = txt.replace('\r',' ').replace('\t',' ')
+    txt = re.sub(r"\u200f|\u200e|\ufeff"," ",txt)
+    txt = re.sub(r"\s+"," ",txt)
+    txt = re.sub(r"\s*([\.\!\?؟،,:;؛])\s*", r"\1 ", txt)
+    return txt.strip()
+def extract_text_pdfminer(data: bytes) -> str:
+    try:
+        import pdfminer.high_level
+        return pdfminer.high_level.extract_text(io.BytesIO(data)) or ""
+    except Exception:
+        return ""
+def extract_text_pypdf(data: bytes) -> str:
+    try:
+        from pypdf import PdfReader
+        out = []
+        for p in PdfReader(io.BytesIO(data)).pages:
+            out.append(p.extract_text() or "")
+        return "\n".join(out)
+    except Exception:
+        return ""
+def extract_text_from_pdf(data: bytes) -> str:
+    txt = extract_text_pdfminer(data)
+    if not txt or len(txt.strip())<10:
+        txt = extract_text_pypdf(data)
+    return clean_text_basic(txt)
+def split_sentences(text:str):
+    return [s for s in [s.strip() for s in SENT_SPLIT.split(text) if s.strip()] if len(s)>=25]
+def top_keywords_yake(text:str, max_k=120, lan='ar'):
+    kws=[kw for kw,_ in yake.KeywordExtractor(lan=lan, n=1, top=max_k).extract_keywords(text)]
+    out,seen=[],set()
+    for k in kws:
+        k=k.strip()
+        if not k or k in seen: continue
+        if lan=="ar" and k in AR_STOP: continue
+        if len(k)<2: continue
+        seen.add(k); out.append(k)
+    return out
+def build_distractors(correct, pool, k=3):
+    cand=[w for w in pool if w!=correct and len(w)>1]
+    random.shuffle(cand)
+    out=[]
+    for w in cand:
+        if len(out)==k: break
+        w2=w.strip()
+        if w2 and w2!=correct.strip(): out.append(w2)
+    fillers=["—","-","—-"]
+    while len(out)<k: out.append(random.choice(fillers))
+    return out
+def make_mcqs_from_text(text: str, n=8, lang='ar'):
+    text = clean_text_basic(text)
+    sents = split_sentences(text)
+    if not sents: raise ValueError("النص قصير جدًا.")
+    keywords = top_keywords_yake(text, 120, lang)
+    if not keywords:
+        toks = re.findall(r"[\p{L}\p{N}_]+", text)
+        toks = [t for t in toks if not (lang=="ar" and t in AR_STOP)]
+        from collections import Counter
+        keywords=[w for w,_ in Counter(toks).most_common(80)]
+    sent_for_kw={}
+    for s in sents:
+        for kw in keywords:
+            if kw in s and kw not in sent_for_kw:
+                sent_for_kw[kw]=s
+    items=[]; used=set()
+    pool=[kw for kw in keywords if kw in sent_for_kw]
+    for kw in pool:
+        if len(items)>=n: break
+        s=sent_for_kw[kw]
+        if s in used: continue
+        blanked=s.replace(kw,"_____",1)
+        choices=build_distractors(kw,[x for x in keywords if x!=kw],3)+[kw]
+        random.shuffle(choices)
+        ans=choices.index(kw)
+        exp=f"مقتبس من الجملة: {s[:220]}" + ("..." if len(s)>220 else "")
+        items.append({
+            "id": str(uuid.uuid4())[:8],
+            "question": blanked,
+            "choices": choices,
+            "answer_index": ans,
+            "explanation": exp
+        })
+        used.add(s)
+    if not items: raise RuntimeError("تعذر التوليد.")
+    return items
+def render_cards(items):
+    html=[]
+    for i,it in enumerate(items,1):
+        li="".join(f"<li>{c}</li>" for c in it["choices"])
+        ans=["A","B","C","D"][it["answer_index"]]
+        html.append(f"""
+        <article class="card">
+          <header><span class="badge">س {i}</span><h3>{it['question']}</h3></header>
+          <ol type="A" class="choices">{li}</ol>
+          <details><summary>الإجابة</summary>
+          <div class="answer"><b>الإجابة:</b> {ans}</div></details>
+        </article>""")
+    return "\n".join(html)
+def to_files(items):
+    json_bytes = io.BytesIO(json.dumps(items, ensure_ascii=False, indent=2).encode("utf-8")); json_bytes.name="mcqs.json"
+    s=io.StringIO(); w=csv.writer(s)
+    w.writerow(["id","question","A","B","C","D","answer_index","explanation"])
+    for it in items:
+        ch=it["choices"]
+        w.writerow([it["id"], it["question"], *(ch+['']*(4-len(ch))), it["answer_index"], it["explanation"]])
+    csv_bytes=io.BytesIO(s.getvalue().encode("utf-8")); csv_bytes.name="mcqs.csv"
+    return json_bytes, csv_bytes
+def pipeline(text, file, n, lang):
+    src = (text or "").strip()
+    if file is not None:
+        b = file.read()
+        name = file.name.lower()
+        if name.endswith(".pdf"):
+            src = extract_text_from_pdf(b)
+        elif name.endswith(".txt"):
+            src = clean_text_basic(b.decode("utf-8","ignore"))
+        else:
+            return "⚠️ ارفعي PDF أو TXT فقط.", None, None
+    if not src: return "⚠️ أدخلي نصًا أو ملفًا.", None, None
+    items = make_mcqs_from_text(src, n, lang)
+    html = render_cards(items)
+    j,c = to_files(items)
+    return html,j,c
+# ====== واجهة Gradio ======
+theme = gr.themes.Soft(primary_hue="indigo").set(radius_size="10px")
+with open("styles.css","r",encoding="utf-8") as f:
+    css = f.read()
+with gr.Blocks(theme=theme, css=css, fill_body=True) as demo:
+    gr.HTML("<style>body{direction:rtl}</style>")
+    gr.Markdown("## 🧠 مولّد أسئلة اختيار من متعدد (PDF / TXT / نص)")
+    with gr.Row():
+        with gr.Column(scale=1):
+            t=gr.Textbox(label="النص",lines=8,placeholder="ألصقي النص هنا أو ارفعي ملف")
+            f=gr.File(label="ملف PDF أو TXT",file_types=[".pdf",".txt"])
+            n=gr.Slider(1,50,value=10,step=1,label="عدد الأسئلة")
+            lang=gr.Dropdown(["ar","en"],value="ar",label="اللغة")
+            b=gr.Button("توليد")
+            j=gr.File(label="تحميل JSON")
+            c=gr.File(label="تحميل CSV")
+        with gr.Column(scale=2):
+            out=gr.HTML(label="النتائج")
+    b.click(pipeline,[t,f,n,lang],[out,j,c])
+if __name__=="__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio
+pdfminer.six
+pypdf
+regex
+yake

styles.css ADDED Viewed

	@@ -0,0 +1,8 @@

+body { background: #fafafa; font-family: 'Cairo', sans-serif; }
+.card {
+  background:#fff; border:1px solid #eaeaea; border-radius:14px;
+  padding:14px; box-shadow: 0 4px 12px rgba(0,0,0,.05); margin-bottom:10px;
+}
+.badge { background:#eef2ff; color:#3730a3; padding:3px 9px; border-radius:999px; font-size:12px; }
+.choices { padding-inline-start:20px; }
+.answer { margin-top:8px; background:#f9fafb; padding:8px; border-radius:10px; border:1px dashed #d1d5db; }